Ejemplo n.º 1
0
def get_all_meets():

    print('Fetching a list of all meets from TFRRS')
    html = urllib.urlopen('http://www.tfrrs.org/teams/{}.html'.format(team_id))
    soup = BeautifulSoup.BeautifulSoup(''.join(html))
    rows = soup.findAll('td', { "class" : "meet" })

    meets = set([])
    for r in rows:
        for link in r.findAll('a'):
            meet_id = brf.id_from_url(link['href'])
            if meet_id: meets.add(meet_id)
    return meets
Ejemplo n.º 2
0
def make_performances_for_meet(meet_id, sex):

    # GET THE PAGE
    success = False
    while success == False:
        try:
            tfrrs_html = urllib.urlopen('http://www.tfrrs.org/results/{}_{}.html'.format(meet_id, sex))
            success = True
        except:
            print('Bad request. Retrying')
    soup = BeautifulSoup.BeautifulSoup(''.join(tfrrs_html))

    # GET EVENTS
    page_events = soup.find('div', {'class': 'eventname'}).find('li').findAll('a')
    events = {}
    for e in page_events:
        events[e['href'].replace('#','')] = e.text

    # DOM ELEMENTS -> DICTIONARIES
    event_tables = soup.findAll('div', {'class': 'data'})
    cur_event_id = None
    cur_round_num = None
    heat_type = None
    ps = []
    # for each event
    for t in event_tables:
        # determine the event id, heat type, and heat number
        e_parts = t['id'].rsplit('_')
        event_id, round_num, heat = (e_parts[0], e_parts[1], e_parts[2])
        if event_id != cur_event_id:
            heat_type = 1
        elif round_num != cur_round_num:
            heat_type += 1
        cur_event_id = event_id
        cur_round_num = round_num
        # for each performance in the event
        rows = t.findAll('tr')
        for r in rows:
            # save the event data
            p = {'event': events[event_id],
                'heat_type': heat_types[heat_type],
                'heat': heat}
            # extract the column data
            cols = r.findAll('td')
            for c in cols:
                if c.has_key('class'):
                    text = c.text
                    attr = c['class'].rsplit(' ')[0]
                    # include link urls
                    link = c.find('a')
                    if link != None:
                        text += link.text
                        link_text = link['href']
                        p[attr+'_link'] = link_text
                    p[attr] = c.text
            ps.append(p)

    # CLEAN THE DICTIONARIES
    performances = []
    for p in ps:
        # check for valid school and athlete
        if p.has_key('team') and p['team'] == team_name and p.has_key('name_link'):
            performances.append({
                'event_name': str(p['event']),
                'tfrrs_athlete_id': brf.id_from_url(p['name_link']),
                'place': brf.clean_int(p['place']),
                'mark': brf.format_mark(p['time']),
                'tfrrs_meet_id': int(meet_id),
                'heat_type': str(p['heat_type']),
                'heat': int(p['heat']),
            })

    return performances