def plot_time_distribution_by_bib_numbers(df): ''' This function display the time distribution of runners according to the BIB numbers Parameters - df: DataFrame containing data to use to generate the graph ''' x = 'number' y = 'time' ax = df.plot(kind='scatter', x=x, y=y, xlim=(-1000, 18000)) ax.set(xlabel=x.capitalize(), ylabel=y.capitalize()) lines = [-200, 2000, 8800, 9800, 17000] annotations = [('10 km', 12500), ('21 km', 4500), ('42 km', 150)] for line in lines: ax.axvline(line, color='b', linestyle='--') for annotation in annotations: annotation_obj = [annotation[0], (0, 0), (annotation[1], 28000)] ax.annotate(annotation_obj[0], annotation_obj[1], annotation_obj[2], color='b') plt.yticks(ax.get_yticks(), [ study_utils.convert_seconds_to_time(label) for label in ax.get_yticks() ]) plt.title('Running time according to BIB number of participants') plt.show()
def plot_time_difference_distribution(data): ''' display a histogram showing the time difference bewteen team members and best time of the team. Parameters - data: DataFrame containing the data relative to a given running. ''' ax = data['time difference team'].hist(bins=30, figsize=(10, 6)) # Computing of the mean and max for bowplot. mean = np.mean(data['time difference team']) max_time_diff = np.max(data['time difference team']) # Display of the median and title ax.axvline(mean, 0, 1750, color='r', linestyle='--') ax.set_title( 'Distribution of runners according to difference of time with the best runner in team' ) ax.set_xlabel('Difference of time') ax.set_ylabel('Number of runners') # Display of x ticks in HH:mm:ss format plt.xticks(ax.get_xticks(), [ study_utils.convert_seconds_to_time(label) for label in ax.get_xticks() ]) # Calculation and display of age distribution statistics by gender time_stats = 'Mean difference of time: ' + study_utils.convert_seconds_to_time( mean ) + '\n' + 'Maximum difference of time: ' + study_utils.convert_seconds_to_time( max_time_diff) props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) ax.text(.95, .95, time_stats, fontsize=11, transform=ax.transAxes, va='top', ha='right', bbox=props, multialignment='left')
def plot_time_distribution(ax, running, name): ''' This function create a subplot containing the time distribution for a given age, and for the 3 types of runnings (10 km, 21km, 42km). Parameters - ax: subplot to use for the histogram (Matplotlib axes) - running: data for a given set of participants of same age (DataFrameGroupby) - name: name of the category ''' # Creation of histogram running_10k = running[running['distance (km)'] == 10] race_10k = running_10k['time'].tolist() running_21k = running[running['distance (km)'] == 21] race_21k = running_21k['time'].tolist() running_42k = running[running['distance (km)'] == 42] race_42k = running_42k['time'].tolist() ax.hist([race_10k, race_21k, race_42k], bins=30, stacked=True, rwidth=1.0, label=['10 km', '21 km', '42 km']) ax.legend() ax.set_ylabel('Number of Runners') ax.set_title('Time distribution (' + str(name) + ')') ax.xaxis.set_label_coords(1.15, -0.025) # Creation of texts total_10k = len(race_10k) total_10k_str = '10 km: ' + str(total_10k) + ' runners' total_21k = len(race_21k) total_21k_str = '21 km: ' + str(total_21k) + ' runners' total_42k = len(race_42k) total_42k_str = '42 km: ' + str(total_42k) + ' runners' total = len(running['time'].tolist()) total_str = 'Total: ' + str(total) + ' runners' stats_str = total_10k_str + '\n' + total_21k_str + '\n' + total_42k_str + '\n' + total_str plt.xticks(ax.get_xticks(), [ study_utils.convert_seconds_to_time(label) for label in ax.get_xticks() ], rotation=90) props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) ax.annotate(stats_str, xy=(0, 1), xytext=(12, -12), va='top', xycoords='axes fraction', textcoords='offset points', bbox=props)
def plot_scatter_difference_time_number(fig, data, distance, subplot_idx, annotation=[], time_mini=1000): ''' This function plots the difference time between team members who have finished late compared to the best time of the team. Parameters - fig: Figure on which subplots are displayed - data: DataFrame containing the data relative to a given running - distance: number of kilometers of the considered running (10/21/42) - subplot_idx: Index of the subplot in the figure - annotation: Annotation to add in the graph (by default, no annotation) - time_mini: Minimal time to consider (how much runners are late compare to the first) (1000 by default) ''' # select runner in teams and with the selected distance race_team = data[(data['team'].notnull()) & (data['distance (km)'] == distance)] # Remove of times which are lower than minimal time considered race_team = (race_team[race_team['time difference team'] > time_mini]) # Remove of teams with only one runner for team in race_team['team']: race_team_selected = race_team[race_team['team'] == team] if len(race_team_selected['team']) == 1: race_team = race_team[race_team['team'] != team] # Map team name with team number team_label_encode = preprocessing.LabelEncoder() team_label = team_label_encode.fit_transform(race_team['team']) race_team['team_code'] = team_label # Computation of runners in pair number_runner_in_pair = race_team.apply(study_utils.compute_pair_runner, args=(race_team, 60), axis=1) counter_pair = Counter(number_runner_in_pair) # Plotting the results plot = fig.add_subplot(subplot_idx) sns.swarmplot(x='team_code', y='time difference team', hue='sex', data=race_team, ax=plot) plot.set_title('Distance: ' + str(distance) + ' km') plot.set_xlabel('') plot.set_ylabel('') plot.legend(loc='upper left') # Add annotation if any given if len(annotation) != 0: if subplot_idx == annotation[0]: plot.annotate(annotation[1], annotation[2], annotation[3], arrowprops=dict(facecolor='red', shrink=0.05)) # Manage of legends if subplot_idx != 311: plot.legend_.remove() if subplot_idx == 312: plot.set_ylabel('Difference of time with the best runner in the team') if subplot_idx == 313: plot.set_xlabel('Team number') plt.yticks(plot.get_yticks(), [ study_utils.convert_seconds_to_time(label) for label in plot.get_yticks() ]) display_legend(counter_pair, plot)
def plot_time_difference_distribution( df, title='Time difference with the best runner in team', time_difference_column_name='time difference team'): ''' This function displays distribution representing time difference bewteen performance of team members and best performance within the team. Parameters - df: DataFrame containing information on runners - title: Title of the graph (by default, 'Time difference with the best runner in team') - time_difference_column_name: Name of column containing time differencies (by default, 'time_difference_column_name') Return - figure: Plotly figure ''' mean_difference = np.mean(df[time_difference_column_name]) mean_difference_dt = datetime.datetime.strptime( study_utils.convert_seconds_to_time(mean_difference), '%H:%M:%S') max_difference = np.max(df[time_difference_column_name]) statistics = [ 'Mean difference of time: ' + str(study_utils.convert_seconds_to_time(mean_difference)), 'Maximum difference of time: ' + str(study_utils.convert_seconds_to_time(max_difference)) ] data = df.copy() data[time_difference_column_name] = pd.to_datetime([ study_utils.convert_seconds_to_time(t) for t in data[time_difference_column_name] ], format='%H:%M:%S') histogram = [ go.Histogram(x=data[time_difference_column_name], xbins={ 'start': np.min(data[time_difference_column_name]), 'end': np.max(data[time_difference_column_name]), 'size': 5 * 60000 }) ] annotations = [ Annotation(y=1, x=1, text='<br>'.join(statistics), align='right', xref='paper', yref='paper', showarrow=False) ] shapes = [{ 'type': 'line', 'yref': 'paper', 'x0': mean_difference_dt, 'y0': 0, 'x1': mean_difference_dt, 'y1': 1, 'line': { 'color': '#f44242', 'width': 2, 'dash': 'dash' } }] figure = study_utils.create_plotly_legends_and_layout( histogram, title=title, x_name='Performance gap', x_type='date', x_format='%H:%M:%S', y_name='Number of runners', barmode='group', bargap=0.1, annotations=annotations, shapes=shapes) plotly.offline.iplot(figure) return figure