コード例 #1
0
def generate_time_distribution_by_bib_numbers(data, performance_criteria):
    '''
    This function generates all BIB/performance scatters for each running of Lausanne Marathon.
    Final Dict has the following pattern:
    {
        <performance_criterion_1>: <Plotly figure>
        [, <performance_criterion_2>: <Plotly figure>
        , ...]
    }

    Parameters
        - df: DataFrame containing records about runners
        - performance_criteria: Array containing column name to use for available performance criteria (time and speed)

    Return
        - figures: Dict containing all time distribution figures
    '''

    # We define options
    runnings_names = {10: '10 km', 21: 'Semi-marathon', 42: 'Marathon'}
    colors = {10: KM_10_COLOR, 21: KM_21_COLOR, 42: KM_42_COLOR}
    default_options = {
        'title':
        'Distribution of performance according to BIB numbers over the years',
        'x_name': 'BIB numbers',
        'x_format': 'f',
        'hovermode': 'closest'
    }
    time_options = {'y_name': 'Time', 'y_type': 'date', 'y_format': '%H:%M'}
    speed_options = {'y_name': 'Speed (m/s)'}
    time_options.update(default_options)
    speed_options.update(default_options)

    # We create final dict
    figures = {}

    # Loop over performance criteria (time, speed)
    for performance_criterion in performance_criteria:
        criterion = performance_criterion.lower()
        scatters = study_utils.create_plotly_scatters(data=data,
                                                      x='number',
                                                      y=criterion,
                                                      hue='distance (km)',
                                                      hue_names=runnings_names,
                                                      text='name',
                                                      color=colors,
                                                      use_hue_names=False)
        if criterion == 'time':
            figure = study_utils.create_plotly_legends_and_layout(
                data=scatters, **time_options)
        else:
            figure = study_utils.create_plotly_legends_and_layout(
                data=scatters, **speed_options)
        figures[performance_criterion] = figure

    return figures
コード例 #2
0
def generate_all_bib_performance_figure(df):
    '''
    This function generates all BIB/performance scatters for each year of Lausanne Marathon.

    Parameters
        - df: DataFrame containing records about runners

    Return
        - figure: Plotly figure
    '''

    # We define the considered the years interval, colors and visibility
    years_range = range(1999, 2017)
    years = {year: str(year) for year in years_range}
    colors = study_utils.generate_colors_palette(data=years_range, isDict=False, forceString=True)
    visibility = {str(year): (True if year > 2015 else 'legendonly') for year in years_range}

    # We define options
    default_options = {'title': 'Distribution of performance according to BIB numbers over the years', 'x_name': 'BIB numbers', 'hovermode':'closest'}
    time_options = {'y_name': 'Time', 'y_type': 'date', 'y_format': '%H:%M'}
    time_options.update(default_options)

    scatters = study_utils.create_plotly_scatters(data=df, x='number', y='time', hue='year', hue_names=years, text='name', color=colors, visibility=visibility)
    figure = study_utils.create_plotly_legends_and_layout(data=scatters, **time_options)
    plotly.offline.iplot(figure)
    return figure
コード例 #3
0
def plot_median_age_evolution(data, x=None, y='Median age (all runnings)', title='Evolution of median age over the years', groupby_column='Gender', groupby_attributes=None):
    '''
    This function displays a graph showing evolution of median ages for male and female runners over the years.

    Parameters
        - data: DataFrame containing data to use for graph
        - x: Name of the column to use for x axis (by default None / if None, index will be used)
        - y: Name of the column to use for y axis (by default, 'Median age (all runnings)')
        - title: Title of the graph (by default, 'Evolution of median age over the years')
        - groupby_column: Name of the column to use for grouping data (by default, 'Gender')
        - groupby_attributes: Dictionary containing options for each unique value in column groupby_column (at present, 'colors' and 'name' are supported / by default, None)
    
    Return
        - figure: Plotly figure
    '''

    lines = []

    for key, group in data.groupby([groupby_column]):
        x_values = group[x] if x else group.index
        line = go.Scatter(x=x_values, y=group[y], mode='lines', name=(groupby_attributes[key].get('name', key) if groupby_attributes else key), marker={'color': (groupby_attributes[key].get('color', None) if groupby_attributes else None)})
        lines.append(line)

    figure = study_utils.create_plotly_legends_and_layout(lines, title=title, x_name=(x if x else data.index.name), y_name=y)
    plotly.offline.iplot(figure)
    return figure
コード例 #4
0
def plot_age_distribution(df, age_column_name='age', sex_column_name='sex'):
    '''
    This function displays the distribution of runners according to their age.

    Parameters:
        - df: DataFrame containing information about runners
        - age_column_name: Name of column containing age of runners
    '''

    # Calculation of age distribution statistics by gender
    statistics = []
    all_genders = ['all']
    all_genders.extend(df[sex_column_name].unique())
    for sex in all_genders:
        if sex == 'all':
            ages = df[age_column_name]
        else:
            ages = df[df[sex_column_name] == sex][age_column_name]
        statistics.append('<b>Mean age of ' + sex + ' runners: ' +
                          str(round(np.mean(ages), 2)) + ' (STD: ' +
                          str(round(np.std(ages), 2)) + ')</b>')

    data = [go.Histogram(x=df[age_column_name])]
    annotations = [
        Annotation(y=1,
                   x=1,
                   text='<br>'.join(statistics),
                   xref='paper',
                   yref='paper',
                   showarrow=False)
    ]
    shapes = [{
        'type': 'line',
        'yref': 'paper',
        'x0': np.mean(df[age_column_name]),
        'y0': 0,
        'x1': np.mean(df[age_column_name]),
        'y1': 1,
        'line': {
            'color': '#f44242',
            'width': 2,
            'dash': 'dash'
        }
    }]
    figure = study_utils.create_plotly_legends_and_layout(
        data,
        title='Age distribution of runners',
        x_name='Age',
        y_name='Number of runners',
        barmode='group',
        bargap=0.25,
        annotations=annotations,
        shapes=shapes)
    plotly.offline.iplot(figure)
    return figure
コード例 #5
0
def plot_age_evolution_boxplots(df, title='Evolution of age of runners over the years', year_column='year', age_column='age'):
    '''
    This function plots evolution of age of runners over the years (using boxplots).

    Parameters
        - df: DataFrame containing records about runners
        - title: Title of the graph (by default, 'Evolution of age of runners over the years')
        - year_column: Name of column containing years of event
        - age_column: Name of column containing age of runners

    Return
        - figure: Plotly figure
    '''

    options = {'title': title, 'x_name': year_column.capitalize(), 'y_name': age_column.capitalize()}
    boxplots = study_utils.create_plotly_boxplots(data=df, x='year', y=age_column)
    figure = study_utils.create_plotly_legends_and_layout(data=boxplots, **options)
    plotly.offline.iplot(figure)
    return figure
コード例 #6
0
def plot_distribution_over_years(data, title='Distribution of runners over the years for Lausanne Marathon'):
    '''
    This function generates a graph representing distribution of runners over the years, given data.

    Parameters
        - data: DataFrame containing distribution of runners over the years
        - title: Title of the graph (by default, 'Distribution of runners over the years for Lausanne Marathon')

    Return
        - figure: Plotly figure
    '''

    colors = {'10 km': KM_10_COLOR, 'Semi-marathon': KM_21_COLOR, 'Marathon': KM_42_COLOR}
    bars = []

    for running in data.columns:
        bars.append(go.Bar(x=[year for year in data.index], y=data[running], name=running, marker={'color': colors[running]}))

    figure = study_utils.create_plotly_legends_and_layout(bars, title=title, x_name='Years', y_name='Number of runners', barmode='stack')
    plotly.offline.iplot(figure)
    return figure
コード例 #7
0
def plot_distribution_between_types_of_participants(df,
                                                    type_column_name='type'):
    '''
    This functions displays the distribution of runners between types of participants.

    Parameters
        - df: DataFrame containing information about runners
        - type_column_name: Name of column containing type of runners (by default, 'type')

    Return
        - figure: Plotly figure
    '''

    x_values, y_values, texts = [[] for i in range(3)]
    nb_total_participants = len(df)
    for type_participants in df[type_column_name].unique():
        x_values.append(type_participants)
        nb_participants = len(df[df[type_column_name] == type_participants])
        y_values.append(nb_participants)
        texts.append('<b>' + type_participants.capitalize() +
                     '</b><br>Number of participants: ' +
                     str(nb_participants) + ' (' +
                     '{:.1f}%'.format(nb_participants * 100 /
                                      nb_total_participants) + ')')

    bar = go.Bar(x=x_values,
                 y=y_values,
                 name=type_participants,
                 text=texts,
                 hoverinfo='text')

    figure = study_utils.create_plotly_legends_and_layout(
        [bar],
        title='Distribution by type of runners',
        x_name='Type of runner',
        y_name='Number of runners',
        barmode='group')
    plotly.offline.iplot(figure)
    return figure
コード例 #8
0
def generate_all_performance_figures(df, age_categories, sex_categories, performance_criteria):
    '''
    This function generates all performance figures according sets of age categories and sex categories and a set of performance criteria.
    Final Dict has the following pattern:
    {
        <age_category_1: {
            <sex_1>: {
                <performance_criterion_1>: <Plotly figure>
                [, <performance_criterion_2>: <Plotly figure>
                , ...]
            }
            [, <sex_2>: {
                <performance_criterion_1>: <Plotly figure>
                [, <performance_criterion_2>: <Plotly figure>
                , ...]
            }, ...]   
        }
        [, <age_category_2: {
            <sex_1>: {
                <performance_criterion_1>: <Plotly figure>
                [, <performance_criterion_2>: <Plotly figure>
                , ...]
            }
            [, <sex_2>: {
                <performance_criterion_1>: <Plotly figure>
                [, <performance_criterion_2>: <Plotly figure>
                , ...]
            }, ...]   
        }, ...]
    }

    Parameters
        - df: DataFrame containing records about runners
        - age_categories: Array containing age categories to be displayed (if 'All'/'all', no filter is done on df)
        - sex_categories: Array containing sex categories to be displayed (if 'All'/'all', no filter is done on df)
        - performance_criteria: Array containing performance criteria to consider

    Return
        - figures: Dict containing all performance figures
    '''

    # We define the considered runnings and the years interval, as colors for boxplots
    runnings = {10: '10 km', 21: 'Semi-marathon', 42: 'Marathon'}
    year_values = [year for year in df['year'].unique() if year]
    colors = {'10 km': KM_10_COLOR, 'Semi-marathon': KM_21_COLOR, 'Marathon': KM_42_COLOR}
    
    # We define options and the final Dict
    figures = {}
    default_options = {'title': 'Performance over years for runnings of Lausanne Marathon', 'x_name': 'Years', 'x_values': year_values, 'boxmode': 'group'}
    time_options = {'y_name': 'Time', 'y_type': 'date', 'y_format': '%H:%M:%S'}
    speed_options = {'y_name': 'Speed (m/s)'}
    time_options.update(default_options)
    speed_options.update(default_options)

    for age_category in age_categories:
        # We select data according to age category
        if age_category.lower() == 'all':
            data = df
        else:
            data = df[df['age category'] == age_category]
        figures[age_category] = {}

        for sex_category in sex_categories:
            # We select data according to sex category
            if sex_category.lower() == 'all':
                data_final = data
            else:
                data_final = data[data['sex'] == sex_category.lower()]
            figures[age_category][sex_category] = {}

            annotations = [Annotation(y=1.1, text='Age category: ' + age_category + '    Sex category: ' + sex_category + ' runners', xref='paper', yref='paper', showarrow=False)]

            # We create a figure for each performance criterion
            for performance_criterion in performance_criteria:
                criterion = performance_criterion.lower()
                boxplots = study_utils.create_plotly_boxplots(data=data_final, x='year', y=criterion, hue='distance (km)', hue_names=runnings, colors=colors)
                if criterion == 'time':
                    figure = study_utils.create_plotly_legends_and_layout(data=boxplots, **time_options, annotations=annotations)
                elif criterion == 'speed (m/s)':
                    figure = study_utils.create_plotly_legends_and_layout(data=boxplots, **speed_options, annotations=annotations)
                else:
                    # By default, two specific criteria are allowed: 'time' and 'speed (m/s)'. If any other criterion is provided, we throw an exception.
                    raise ValueError('Invalid performance criterion encountered. Performance criterion must be either \'Time\' or \'Speed (m/s)\'')
                figures[age_category][sex_category][performance_criterion] = figure

    return figures
コード例 #9
0
def generate_all_evolution_figures(df, age_categories, sex_categories):
    '''
    This function generates all evolution figures according sets of age categories and sex categories.
    Final Dict has the following pattern:
    {
        <age_category_1: {
            <sex_1>: <Plotly figure
            [, <sex_2>: <Plotly figure, ...]
        }
        [, <age_category_2: {
            <sex_1>: <Plotly figure
            [, <sex_2>: <Plotly figure, ...]
        }, ...]
    }

    Parameters
        - df: DataFrame containing records about runners
        - age_categories: Array containing age categories to be displayed (if 'All'/'all', no filter is done on df)
        - sex_categories: Array containing sex categories to be displayed (if 'All'/'all', no filter is done on df)

    Return
        - figures: Dict containing all evolution figures
    '''

    # We define the considered runnings and the years interval
    runnings = {'column_name': 'distance (km)', 'values': OrderedDict([(10, {'name': '10 km', 'color': KM_10_COLOR}), (21, {'name': 'Semi-marathon', 'color': KM_21_COLOR}), (42, {'name': 'Marathon', 'color': KM_42_COLOR})])}

    year_values = [year for year in df['year'].unique() if year]
    
    # We define options and the final Dict
    figures = {}
    options = {'title': 'Evolution of number of participants over years for runnings of Lausanne Marathon', 'x_name': 'Years', 'x_values': year_values, 'y_format': 'f'}

    for age_category in age_categories:
        # We select data according to age category
        if age_category.lower() == 'all':
            data = df
        else:
            data = df[df['age category'] == age_category]
        figures[age_category] = {}

        for sex_category in sex_categories:
            # We select data according to sex category
            if sex_category.lower() == 'all':
                data_final = data
            else:
                data_final = data[data['sex'] == sex_category.lower()]

            lines = []

            for km, attributes in runnings['values'].items():
                data_running = data_final[data_final[runnings['column_name']] == km]
                line = go.Scatter(x = year_values, y = [len(data_running[data_running['year'] == y]) for y in year_values], mode = 'lines', name = attributes['name'], marker={'color': attributes['color']})
                lines.append(line)

            annotations = [Annotation(y=1.1, text='Age category: ' + age_category + '    Sex category: ' + sex_category + ' runners', xref='paper', yref='paper', showarrow=False)]
            
            figure = study_utils.create_plotly_legends_and_layout(data=lines, **options, annotations=annotations)
            figures[age_category][sex_category] = figure

    return figures
コード例 #10
0
def generate_teams_evolution_figures(data, title='Evolution of teams performance over the years', runnings=None, team_column_name='team', year_column_name='year', min_years=6, nb_teams=8, threshold_bins_size=50, display_annotations=True):
    '''
    This function generate teams_evolution figures for all runnings.
    Final Dict has the following pattern:
    {
        <running_1: {
            <Plotly figure>
        }
        [, <running_2: {
            <Plotly figure>
        }, ...]
    }

    Parameters
        - data: DataFrame containing results
        - title: Title of figure
        - runnings: Dict containing name of column containing runnings (key: column_name) and set of runnings (key: values, value: dict() with key: value in column, value: name of running)
                    By default, None. If None, default values will be set by function.
        - team_column_name: Name of column containing teams (by default, 'teams')
        - year_column_name: Name of column containing year associated to a given result (by default, 'year')
        - min_years: Minimum of participations when considering a team (by default, 6)
        - nb_teams: Number of teams to consider among teams with number of participations > min_years (by default, 8)
                    Note: Teams are filtered by number of participants.
        - threshold_bins_size: Maximum size of a bin (by default, 25)
                    Note: Size of bin is related to number of participants of a considered team and for a given year. If None, no limitation is used.
        - display_annotations: Boolean used to display annotations (by default, True)

    Return
        - figures: Dict containing all teams evolution figures 
    '''

    # Default runnings
    if not runnings:
        runnings = {'column_name': 'distance (km)', 'values': {10: '10 km', 21: 'Semi-marathon', 42: 'Marathon'}}

    figures = {}

    # Loop over runnings
    for key, value in runnings['values'].items():
        # We retrieve data related to current running
        filtered_data = data[data[runnings['column_name']] == key]
        # We retrieve names of the <nb_teams> most important groups with at least <min_years> participations in Lausanne Marathon
        top_teams = filtered_data.groupby(team_column_name).filter(lambda x: x[year_column_name].nunique() >= min_years).groupby('team').size().sort_values(ascending=False).nlargest(nb_teams)
        # We keep only data linked with such groups
        data_top_teams = filtered_data[filtered_data[team_column_name].isin(top_teams.index.values)]
        # We finally groupby teams after complete filter
        groups_top_teams = data_top_teams.groupby(team_column_name)

        # We generate colors for each group and we initialize array that will contain traces
        colors = study_utils.generate_colors_palette(groups_top_teams.groups)
        traces = []

        # Loop over groups
        for name, groups in groups_top_teams:
            x_values, y_values, size_values, texts = [], [], [], []
            # Loop over participation years for current group
            for year, results in groups.groupby(year_column_name):
                x_values.append(year)
                y = study_utils.compute_average_time(results)
                y_values.append(y)
                text = '<b>Team: ' + name + '</b><br>Average time: ' + y.strftime('%H:%M:%S') + '<br>Participants: ' + str(len(results)) + '<br>Median age: ' + str(int(results['age'].median()))
                texts.append(text)
                size = len(results) if not threshold_bins_size or (len(results) < threshold_bins_size) else threshold_bins_size
                size_values.append(size)
            trace = go.Scatter(x=x_values, y=y_values, name=name, mode='lines+markers', hoverinfo='text', text=texts, marker=dict(size=size_values, color=colors[name], line=dict(width = 1.5, color = 'rgb(0, 0, 0)')))
            traces.append(trace)

        # For each running, we create annotations if asked by user, we set multiple options accordingly and we store figure
        if display_annotations:
            annotations = [Annotation(y=1.1, text='Running: ' + str(value) + ' | Top teams: ' + str(nb_teams) + ' | Minimum participations: ' + str(min_years) + ' | Maximum bins size: ' + str(threshold_bins_size), xref='paper', yref='paper', showarrow=False)]
        else:
            annotations = None
        options = {'title': title, 'hovermode': 'closest', 'x_name': 'Year', 'y_name': 'Median time', 'y_type': 'time', 'y_format': '%H:%M:%S', 'annotations': annotations}
        figure = study_utils.create_plotly_legends_and_layout(data=traces, **options)
        figures[value] = figure

    return figures
コード例 #11
0
def plot_runners_teams_individual_distribution_according_to_running_type(
        df,
        title='Team/individual runners composition',
        runnings=None,
        team_column_name='profile'):
    '''
    This function displays the distribution of participants according to their profiles (individual runners/runners in team) for the different runnings.

    Parameters
        - df: DataFrame containing data
        - title: Title of the graph (by default, 'Team/individual runners composition')
        - runnings: Dict containing name of column containing runnings (key: column_name) and set of runnings (key: values, value: dict() with following keys: name, color)
                    By default, None. If None, default values will be set by function.
        - team_column_name: Name of column containing type of participants (by default, 'profile')

    Return
        - figure: Plotly figure
    '''

    if not runnings:
        runnings = {
            'column_name':
            'distance (km)',
            'values':
            OrderedDict([(10, {
                'name': '10 km',
                'color': KM_10_COLOR
            }), (21, {
                'name': 'Semi-marathon',
                'color': KM_21_COLOR
            }), (42, {
                'name': 'Marathon',
                'color': KM_42_COLOR
            })])
        }

    data = []

    annotations_texts = []

    for key, attributes in runnings['values'].items():
        filtered_df = df[df[runnings['column_name']] == key]
        nb_runners_running = len(filtered_df)
        x_values, y_values, texts = [[] for i in range(3)]

        for profile in filtered_df[team_column_name].unique():
            x_values.append(profile)
            nb_runners = len(
                filtered_df[filtered_df[team_column_name] == profile])
            y_values.append(nb_runners)
            texts.append('<b>' + attributes['name'] + '</b><br>' +
                         profile.capitalize() + ' runners: ' +
                         str(nb_runners) + ' (' +
                         '{:.1f}%'.format(nb_runners * 100 /
                                          nb_runners_running) + ')')
        annotations_texts.append(attributes['name'] + ': ' +
                                 str(nb_runners_running) + ' runners')
        data.append(
            go.Bar(x=x_values,
                   y=y_values,
                   name=attributes['name'],
                   text=texts,
                   hoverinfo='text',
                   marker={'color': attributes['color']}))

    annotations = [
        Annotation(y=1.1,
                   x=0,
                   text=' | '.join(annotations_texts),
                   xref='paper',
                   yref='paper',
                   showarrow=False)
    ]
    figure = study_utils.create_plotly_legends_and_layout(
        data,
        title=title,
        x_name='Composition',
        y_name='Number of runners',
        barmode='group',
        annotations=annotations)
    plotly.offline.iplot(figure)
    return figure
コード例 #12
0
def plot_gender_distributions(df):
    '''
    This functions displays graph representing the gender distribution of Canton of Vaud and Lausanne Marathon 2016 for comparison.

    Parameters
        - df: DataFrame containing information on runners for Lausanne Marathon 2016

    Return
        - figure: Plotly figure
    '''

    # Building of DataFrame for ploting
    CANTON_VAUD = 'Canton of Vaud'
    LAUSANNE_MARATHON = 'Lausanne Marathon'
    total_runners = len(df)
    total_runners_male = len(df[df['sex'] == 'male'])
    total_runners_female = len(df[df['sex'] == 'female'])
    vaud_information_population = pd.Series({
        'male':
        TOTAL_RESIDENT_MALE / TOTAL_RESIDENT_VAUD * 100,
        'female':
        TOTAL_RESIDENT_FEMALE / TOTAL_RESIDENT_VAUD * 100
    })
    marathon_information_runner = pd.Series({
        'male':
        total_runners_male / total_runners * 100,
        'female':
        total_runners_female / total_runners * 100
    })
    information_population = pd.DataFrame({
        CANTON_VAUD:
        vaud_information_population,
        LAUSANNE_MARATHON:
        marathon_information_runner
    })
    information_population.sort_index(axis=0,
                                      level=None,
                                      ascending=False,
                                      inplace=True)

    text_vaud = [
        '<b>' + CANTON_VAUD + '</b><br>' + str(TOTAL_RESIDENT_MALE) +
        ' residents', '<b>' + CANTON_VAUD + '</b><br>' +
        str(TOTAL_RESIDENT_FEMALE) + ' residents'
    ]
    text_marathon = [
        '<b>' + LAUSANNE_MARATHON + '</b><br>' + str(total_runners_male) +
        ' runners', '<b>' + LAUSANNE_MARATHON + '</b><br>' +
        str(total_runners_female) + ' runners'
    ]
    vaud_trace = go.Bar(x=information_population.index.values,
                        y=information_population[CANTON_VAUD],
                        name=CANTON_VAUD,
                        hoverinfo='text',
                        text=text_vaud)
    marathon_trace = go.Bar(x=information_population.index.values,
                            y=information_population[LAUSANNE_MARATHON],
                            name=LAUSANNE_MARATHON,
                            hoverinfo='text',
                            text=text_marathon)
    data = [vaud_trace, marathon_trace]

    annotations = [
        Annotation(y=1.1,
                   text='Total residents: ' + str(TOTAL_RESIDENT_VAUD) +
                   ' | Total runners: ' + str(total_runners),
                   xref='paper',
                   yref='paper',
                   showarrow=False)
    ]
    figure = study_utils.create_plotly_legends_and_layout(
        data,
        title='Gender distribution Lausanne Marathon vs Canton of Vaud',
        x_name='Gender',
        y_name='Percentage (%)',
        barmode='group',
        annotations=annotations)
    plotly.offline.iplot(figure)
    return figure
コード例 #13
0
def plot_time_distribution_by_age(data, runnings=None, age_column_name='age'):
    '''
    This function plots the distribution of time for all ages regarding participants of a Lausanne Marathon.
    3 subplots are displayed per rows.

    Parameters
        - data: DataFrame containing all the information of a Lausanne Marathon
        - runnings: Dict containing name of column containing runnings (key: column_name) and set of runnings (key: values, value: dict() with following keys: name, color)
                    By default, None. If None, default values will be set by function.
        - age_column_name: Name of the column containing age of participants('age' or 'age category', by default, 'age')
    '''

    if not runnings:
        runnings = {
            'column_name': 'distance (km)',
            'values': {
                10: {
                    'name': '10 km',
                    'color': KM_10_COLOR
                },
                21: {
                    'name': 'Semi-marathon',
                    'color': KM_21_COLOR
                },
                42: {
                    'name': 'Marathon',
                    'color': KM_42_COLOR
                }
            }
        }
    groups = data.groupby(age_column_name)

    figures = {}
    options = {
        'x_name': 'Performance time',
        'y_name': 'Number of runners',
        'x_type': 'date',
        'x_format': '%H:%M:%S',
        'barmode': 'overlay',
        'bargroupgap': 0.1
    }

    for name, group in groups:
        histograms, statistics = [], []
        for km, attributes_running in runnings['values'].items():
            x = group[group[runnings['column_name']] == km]['time']
            statistics.append(attributes_running['name'] + ': ' + str(len(x)) +
                              ' runners')
            histograms.append(
                go.Histogram(x=x,
                             xbins={
                                 'start': np.min(group['time']),
                                 'end': np.max(group['time']),
                                 'size': 5 * 60000
                             },
                             name=attributes_running['name'],
                             marker={'color': attributes_running['color']},
                             opacity=0.5))
        statistics.append('Total: ' + str(len(group)) + ' runners')
        annotations = [
            Annotation(y=1.1,
                       x=0.9,
                       text=' | '.join(statistics),
                       xref='paper',
                       yref='paper',
                       showarrow=False)
        ]
        figure = study_utils.create_plotly_legends_and_layout(
            data=histograms,
            title='Time distribution (' + name + ')',
            **options,
            annotations=annotations)
        figures[name] = figure
    return figures
コード例 #14
0
def plot_time_difference_distribution(
        df,
        title='Time difference with the best runner in team',
        time_difference_column_name='time difference team'):
    '''
    This function displays distribution representing time difference bewteen performance of team members and best performance within the team.
    
    Parameters
        - df: DataFrame containing information on runners
        - title: Title of the graph (by default, 'Time difference with the best runner in team')
        - time_difference_column_name: Name of column containing time differencies (by default, 'time_difference_column_name')

    Return
        - figure: Plotly figure
    '''

    mean_difference = np.mean(df[time_difference_column_name])
    mean_difference_dt = datetime.datetime.strptime(
        study_utils.convert_seconds_to_time(mean_difference), '%H:%M:%S')
    max_difference = np.max(df[time_difference_column_name])
    statistics = [
        'Mean difference of time: ' +
        str(study_utils.convert_seconds_to_time(mean_difference)),
        'Maximum difference of time: ' +
        str(study_utils.convert_seconds_to_time(max_difference))
    ]

    data = df.copy()
    data[time_difference_column_name] = pd.to_datetime([
        study_utils.convert_seconds_to_time(t)
        for t in data[time_difference_column_name]
    ],
                                                       format='%H:%M:%S')
    histogram = [
        go.Histogram(x=data[time_difference_column_name],
                     xbins={
                         'start': np.min(data[time_difference_column_name]),
                         'end': np.max(data[time_difference_column_name]),
                         'size': 5 * 60000
                     })
    ]
    annotations = [
        Annotation(y=1,
                   x=1,
                   text='<br>'.join(statistics),
                   align='right',
                   xref='paper',
                   yref='paper',
                   showarrow=False)
    ]
    shapes = [{
        'type': 'line',
        'yref': 'paper',
        'x0': mean_difference_dt,
        'y0': 0,
        'x1': mean_difference_dt,
        'y1': 1,
        'line': {
            'color': '#f44242',
            'width': 2,
            'dash': 'dash'
        }
    }]
    figure = study_utils.create_plotly_legends_and_layout(
        histogram,
        title=title,
        x_name='Performance gap',
        x_type='date',
        x_format='%H:%M:%S',
        y_name='Number of runners',
        barmode='group',
        bargap=0.1,
        annotations=annotations,
        shapes=shapes)
    plotly.offline.iplot(figure)
    return figure