예제 #1
0
def get_chronotrack_results(events=pd.DataFrame({
    "event_name": [],
    "chronotrack_id": [],
    "date": []}),
    race_overrides=pd.DataFrame({
        "race_name": ['2K Para Nordic Cup', '5K Hauska Heikki Ski'],
        "discipline": ['sitski', 'freestyle'],
        "distance": [2.0, 5.0]
    })
):
    total_results = []
    for index, event in events.iterrows():
        results = scrape_chronotrack_results(event.chronotrack_id)
        results['distance'] = [extract_distance_from_race_name(n) for n in results.race_name]
        results['discipline'] = [extract_discipline_from_race_name(n) for n in results.race_name]
        results['event_name'] = event.event_name
        results['date'] = event.date

        results['duration'] = [parse_time_millis(t) for t in results.time]
        results = attach_placements(results)

        total_results.append(results)

    results_df = pd.concat(total_results)
    results_df = results_df[~pd.isnull(results_df.discipline)]
    results_with_overrides = results_df.merge(race_overrides, how='left', on=['race_name'],
                                              suffixes=('', '_override'))
    results_with_overrides['distance'] = np.where(pd.isnull(results_with_overrides.distance),
                                                  results_with_overrides.distance_override, results_with_overrides.distance)
    results_with_overrides['discipline'] = np.where(pd.isnull(results_with_overrides.discipline),
                                                  results_with_overrides.discipline_override, results_with_overrides.discipline)

    return results_with_overrides[['overall_place', 'gender_place', 'name', 'location', 'time', 'gender', 'distance', 'discipline',
                    'event_name', 'date', 'age']]
예제 #2
0
def get_myraceresults_results(events=pd.DataFrame({
    "event_name": [],
    "url": [],
    "date": []})):
    total_results = []
    for index, event in events.iterrows():
        races = mrrs.get_mrr_races(event.url)
        for contest_number, list_name, race_name, event_id, event_key in races:
            results, column_names = mrrs.get_mrr_results(event_id, event_key, list_name, contest_number, race_name = race_name)
            results_df = pd.DataFrame(results, columns = ['name', 'location', 'age_group', 'time', 'race_name'])
            results_df['gender'] = np.where(results_df.age_group.str.startswith('M'), 'male', 'female')
            results_df['discipline'] = [extract_discipline_from_race_name(n) for n in results_df.race_name]
            # ignore undetectable disciplines and non-ski discplines
            results_df = results_df[~pd.isnull(results_df.discipline)]
            results_df['distance'] = [extract_distance_from_race_name(n) for n in results_df.race_name]
            results_df['duration'] = [parse_time_millis(t) for t in results_df.time]
            # ignore DNFs and borked time formats
            results_df = results_df[~pd.isnull(results_df.duration)]
            results_df['date'] = event.date
            results_df['event_name'] = event.event_name
            results_df = attach_placements(results_df)

            total_results.append(results_df)

    return pd.concat(total_results)[['overall_place', 'gender_place', 'name', 'location', 'time', 'gender', 'distance',
                                     'discipline', 'event_name', 'date']]
예제 #3
0
def get_myraceresults_results(events=pd.DataFrame({
    "event_name": ['Vasaloppet USA', 'Noquemanon Ski Marathon', 'Pepsi Challenge', 'Great Bear Chase'],
    "url": ['https://my3.raceresult.com/117060/', 'https://my5.raceresult.com/115565/',
                  'https://my2.raceresult.com/118903/', 'https://my1.raceresult.com/118905/'],
    "date": ['2019-02-09', '2019-01-26', '2019-03-02', '2019-03-09']})):
    total_results = []
    for index, event in events.iterrows():
        races = mrrs.get_mrr_races(event.url)
        for contest_number, list_name, race_name, event_id, event_key in races:
            results, column_names = mrrs.get_mrr_results(event_id, event_key, list_name, contest_number, race_name = race_name)
            results_df = pd.DataFrame(results, columns = ['name', 'location', 'age_group', 'time', 'race_name'])
            results_df['gender'] = np.where(results_df.age_group.str.startswith('M'), 'male', 'female')
            results_df['discipline'] = [extract_discipline_from_race_name(n) for n in results_df.race_name]
            # ignore undetectable disciplines and non-ski discplines
            results_df = results_df[~pd.isnull(results_df.discipline)]
            results_df['distance'] = [extract_distance_from_race_name(n) for n in results_df.race_name]
            results_df['duration'] = [parse_time_millis(t) for t in results_df.time]
            # ignore DNFs and borked time formats
            results_df = results_df[~pd.isnull(results_df.duration)]
            results_df['date'] = event.date
            results_df['event_name'] = event.event_name
            results_df = attach_placements(results_df)

            total_results.append(results_df)

    return pd.concat(total_results)[['overall_place', 'gender_place', 'name', 'location', 'time', 'gender', 'distance',
                                     'discipline', 'event_name', 'date']]
예제 #4
0
def get_gopher_state_results(event_names_to_distance = pd.DataFrame({'event_name': ['Turtle River Pursuit', 'Big Island and Back'],
                                           'distance': [11, 7.5]})):
    events_2019 = [e for e in gss.get_events() if '2019' in e[1]]
    races_2019 = gss.get_races_from_events(events_2019)
    results_2019 = gss.get_results_from_races(races_2019)
    # remove bike and team results
    results_2019 = results_2019[~results_2019.race_name.str.contains('Bike') & ~results_2019.race_name.str.contains('Tm')]
    results_2019['date'] = [gss.extract_date_from_race_name(n) for n in results_2019.event_name]
    results_2019['discipline'] = [extract_discipline_from_race_name(n) for n in results_2019.race_name]
    results_2019 = results_2019[~pd.isnull(results_2019.discipline)]
    results_2019['event_name'] = [gss.extract_event(n) for n in results_2019.event_name]
    results_2019['name'] = results_2019.first_name + " " + results_2019.last_name
    results_2019['gender'] = np.where(results_2019.gender == 'M', 'male', 'female')
    results_2019['duration'] = [parse_time_millis(t) for t in results_2019.time]
    results_2019['location'] = None

    results = attach_placements(results_2019).merge(event_names_to_distance, how="inner", on=['event_name'])

    return results[['overall_place', 'gender_place', 'name', 'location', 'time', 'gender', 'distance', 'discipline',
                    'event_name', 'date']]
예제 #5
0
]]
# I spot checked these are - they are cases of malformed names (e.g. last or first name only)
all_results = all_results[~pd.isnull(all_results.name)]
# a handful of yet misaligned columns - gross but too lazy to fix at source
all_results['duration'] = np.where(all_results.duration.str.len() < 7,
                                   all_results.age, all_results.duration)
# a few additional have borked ages
all_results['age'] = np.where(all_results.age.str.len() <= 3, all_results.age,
                              None)
all_results.to_csv(STORAGE_DIRECTORY + 'pdf_birkie.csv')

##########################
# with results (mostly) parsed out, we need to derive gender agnostic overall place
##########################
all_results['time'] = all_results.duration
all_results['duration'] = [parse_time_millis(t) for t in all_results.duration]


def attach_placements(results):
    time_ordered_results = results.sort_values('duration')

    time_ordered_results['gender_place'] = time_ordered_results\
        .groupby(['date', 'discipline', 'gender'])\
        .cumcount() + 1
    time_ordered_results['overall_place'] = time_ordered_results\
        .groupby(['date', 'discipline'])\
        .cumcount() + 1

    return time_ordered_results

예제 #6
0
##############################
# start control flow
##############################

results = pd.concat([
    get_gopher_state_results(),
    get_itiming_results(),
    get_chronotrack_results(),
    get_mtec_vasa_results(),
    get_mtec_mob_results(),
    get_mrr_results(),
    get_orr_results(),
])

results = results[~pd.isnull(results.time)]
results['time_parsed'] = [parse_time_millis(t) for t in results.time]
# note, this knocks off a handful of labelled DNFs & DQs
results = results[~pd.isnull(results.time_parsed)]

results['event_name_enumeration'] = [
    enumerate_event_name(name) for name in results.event_name
]
results['date'] = pd.to_datetime(results.date)
results['distance'] = pd.to_numeric(results.distance / 1000)

results = attach_placements(results)

con = None
try:
    con = get_connection()
    cursor = con.cursor()