Ejemplo n.º 1
0
def race_set_runner_trial():
    #relevant_dir = 'fulldata/2005'
    relevant_dir = 'test_1'
    #relevant_dir = 'code_test_1_month'
    #relevant_dir = 'code_test_3_months'
    #relevant_dir = 'code_test_many_events'
    #relevant_dir = 'code_test_single_event'
    #relevant_dir = 'code_test_single_race'    
    data_getter = DataGetter(relevant_dir)
    data_getter.format_data()
    df = data_getter.get_racesdataframe()
    #print df.head()
    #df['Date'] = df['Date'].apply(pd.to_datetime, format='%d/%m/%Y')
    #df['Race_Time'] = df['Race_Time'].apply(pd.to_datetime)
    
    ######## APPLY THIS IN DATAGETTER WHEN FORMING DATAFRAME #########
    df['Race_Date_Time'] = pd.to_datetime(df.apply(lambda x: x['Date'] + ' ' + x['Race_Time'], 1),format = '%d/%m/%Y %H:%M')
   
#    df['Race_Date_Time'] = pd.concat([df['Date'],' ',df['Race_Time']], axis = 1)
#    df['Race_Date_Time'] = pd.to_datetime(df['Race_Date_Time'])
    df = df.drop(['Date','Race_Time'], 1)
    #print df.head()
    ########## END OF ROUTINE TO APPLY IN MAIN DATAFRAME ###########################
    

    race_runner_parameter_dict = {'min_num_race_results':10000}

    trial_parameters = {'Track_list':['Crayford','Coventry'],'Grade_list':['HP','A8','A7','A6','A5','A4','A3','A2','A1','IM',],'Start_Date':datetime.datetime(2005,01,01), 'End_Date':datetime.datetime(2005,01,03),'num_sims':10}
    trial_parameters = {'Track_list':[],'Grade_list':[],'Start_Date':None, 'End_Date':None,'num_sims':10000}
    race_runner_object = None
    RSR = RaceSetRunner(df,trial_parameters, race_runner_object, race_runner_parameter_dict)
    RSR.mainline()
Ejemplo n.º 2
0
    def mainline(self):
        '''
        Load the relevant directory data from a file into a dataframe
        If the file does not exist, create the file
        ''' 
        # Get all the relevant data into a dataframe
        # - if it doesn't exist as a CSV file, then save it as a CSV 
        #experiment_results_table = self.create_experiment_results_table()
        data_file_list = ut.get_shallow_csv_file_list(self.data_dir_path)
        if len(data_file_list) != 1:
            data_getter = DataGetter(self.data_dir_path)
            data_getter.format_data()
            data_getter.write_csv()
            self.data_df = data_getter.get_racesdataframe()
        else:
            data_getter = DataGetter(self.data_dir_path)
            data_getter.load_csv()
        self.data_df = data_getter.get_racesdataframe()
#        print self.data_df.tail()
        trial_results = []
        for trial_ref, trial_definition in self.experiment_design.items():
            #print trial_ref#, trial_definition
            trial_runner = TrialRunner(self.data_df, trial_definition)
            trial_result_table = trial_runner.mainline()
            #print trial_result_table
            #print 'trial_result_table.shape' , trial_result_table.shape
            trial_results.append(trial_result_table)
            #print trial_result_dict
        experiment_results = trial_results[0]
        experiment_results['trial'] = 1
        for ind, result_table in enumerate(trial_results[1:]):
            result_table['trial'] = ind+2
            experiment_results = experiment_results.append(result_table)
        #print 'experiment_results.shape'
        #print experiment_results.shape
        experiment_results.to_csv(self.results_write_file_path, encoding='utf-8')
        print 'Experiment Completed'
        return experiment_results
Ejemplo n.º 3
0
def trial_RaceRunner():
    #relevant_dir = 'fulldata/2005'
    #relevant_dir = 'test_1'
    #relevant_dir = 'code_test_1_month'
    relevant_dir = 'code_test_3_months'
    #relevant_dir = 'code_test_many_events'
    #relevant_dir = 'code_test_single_event'
    #relevant_dir = 'code_test_single_race'    
    data_getter = DataGetter(relevant_dir)
    data_getter.format_data()
    df = data_getter.get_racesdataframe()
    #print df.head()
    #df['Date'] = df['Date'].apply(pd.to_datetime, format='%d/%m/%Y')
    #df['Race_Time'] = df['Race_Time'].apply(pd.to_datetime)
    
    ######## APPLY THIS IN DATAGETTER WHEN FORMING DATAFRAME #########
    df['Race_Date_Time'] = pd.to_datetime(df.apply(lambda x: x['Date'] + ' ' + x['Race_Time'], 1),format = '%d/%m/%Y %H:%M')
   
#    df['Race_Date_Time'] = pd.concat([df['Date'],' ',df['Race_Time']], axis = 1)
#    df['Race_Date_Time'] = pd.to_datetime(df['Race_Date_Time'])
    df = df.drop(['Date','Race_Time'], 1)
    #print df.head()
    ########## END OF ROUTINE TO APPLY IN MAIN DATAFRAME ###########################


    ######## TO CREATE A LIST OF RACES FROM THE DATAFRAME ########    
    def split_2_part_tuple(some_tuple):
        return some_tuple[0], some_tuple[1]

    races = pd.DataFrame(pd.unique(df[[ 'Track','Race_Date_Time']].values))
    #print races.head()
    races['Track'], races['Race_Date_Time'] = zip(*races[0].map(split_2_part_tuple))
    races = races.drop(0,1)
    races = races.sort(['Race_Date_Time'], ascending = [1])
    races.set_index('Race_Date_Time', inplace = True)

    ####### END OF CREATING A LIST OF RACES FROM THE DATAFRAME ###############     

    #print races.head(10)
    #print list(races['Race_Date_Time'])
    for k, v in races.iterrows():
        #print k,v
        race_date = df[df['Race_Date_Time'] == k]
        race_track = v
        # This returns a list
        #race_track =  race_date['Track'].unique()
        #race_track = 
#        race_track = df[df['Race_Date_Time'] == k]
        #print k
    dog_list = list(race_date['Dog'])
        #print 'dog_list' , #dog_list
        #print race_dogs['Dog'].head()
#    print k   
#    print dog_list
    #print race_date.head(11)
    #race_track = race_date['Track'].unique()
    #print race_track, k
#    race_track = list(race_track)
#    print type(race_track)
#    print type(k)
##    k = k.to_datetime()
##    print k
#    #print k.to_datetime()
#    race_track.extend([k])
#    race_reference =  race_track[:]
#    #race_reference = list(race_track).append(k)
#    print 'race reference' , race_reference
        
#    print 'v', v[0]
#    print
#    print k
        #print 'v', v

    full_df = df  ; race_reference_dict = {'Track':v[0],'Race_Date_Time':k} ;
    num_sims = 10000 ; parameter_dict = {'min_num_race_results':10}


    race_runner = RaceRunner(full_df, race_reference_dict, num_sims, parameter_dict) 
    results = race_runner.main_line()
    print 'results_shape' , results.shape
    print results.head()