async def load_2009(cls): year = 2009 with open("data/old_champs/2009-2010/2009CMPresultsandrankings.html" ) as f: data = f.read() with open("data/old_champs/2009-2010/awards2") as f: awards_data = f.read() soup = BeautifulSoup(data, 'lxml') tables = list(soup.find_all("table")) finals = ResultsPageHelper.load_matches(tables[0], "0910cmp0") franklin = ResultsPageHelper.load_matches(tables[1], "0910cmp1") edison = ResultsPageHelper.load_matches(tables[2], "0910cmp2") franklin_rank = ResultsPageHelper.load_rankings(tables[3], franklin, has_hs=False) edison_rank = ResultsPageHelper.load_rankings(tables[4], edison, has_hs=False) events = cls.mk_champs(year, "2010-04-14", "2010-04-17") awards = cls.load_awards_file(awards_data, year, events[-1].key) await cls.finalize([ finals, franklin, edison, franklin_rank, edison_rank, events, awards ], events, year)
async def scrape_event(cls, url, event): main_data = await cls.get(url) rankings_data = await cls.get(url + "/team-rankings") match_details_data = await cls.get(url + "/match-results-details") soup = BeautifulSoup(main_data, 'lxml') #print("Results for", event.key) awards = cls.load_awards(soup, event) #pprint.pprint(awards) match_details_soup = BeautifulSoup(match_details_data, 'lxml') match_details_table = match_details_soup.find("th").find_parent( "table") matches = ResultsPageHelper.load_match_details(match_details_table, event.key) #pprint.pprint(matches) rankings_soup = BeautifulSoup(rankings_data, 'lxml') ranking_table = rankings_soup.find("th").find_parent("table") rankings = ResultsPageHelper.load_rankings(ranking_table, matches) #pprint.pprint(rankings) return awards, rankings, matches
async def load_2012(cls): year = 2012 with open( "data/old_champs/2012-2013/Match_Results_World Championship_Edison.html" ) as f: edison = ResultsPageHelper.load_matches( BeautifulSoup(f.read(), 'lxml').find("table"), "1213cmp1") with open( "data/old_champs/2012-2013/Match_Results_World Championship_Franklin.html" ) as f: franklin = ResultsPageHelper.load_matches( BeautifulSoup(f.read(), 'lxml').find("table"), "1213cmp2") with open("data/old_champs/2012-2013/finals.html") as f: finals = ResultsPageHelper.load_matches( BeautifulSoup(f.read(), 'lxml').find("table"), "1213cmp0") with open( "data/old_champs/2012-2013/Rankings_World Championship_Edison.html" ) as f: edison_rank = ResultsPageHelper.load_rankings( BeautifulSoup(f.read(), 'lxml').find("table"), edison) with open( "data/old_champs/2012-2013/Rankings_World Championship_Franklin.html" ) as f: franklin_rank = ResultsPageHelper.load_rankings( BeautifulSoup(f.read(), 'lxml').find("table"), franklin) with open("data/old_champs/2012-2013/awards") as f: awards = cls.load_awards_file(f.read(), year, '1213cmp0') events = cls.mk_champs(year, "2013-04-24", "2013-04-27") await cls.finalize([ finals, franklin, edison, franklin_rank, edison_rank, events, awards ], events, year)
async def load_1516resq(cls): with open("data/ftc-data/events/1516resq/1516resq-event-list.csv") as f: csv_reader = csv.reader(f.read().split("\n")) finals = None for row in csv_reader: if not row: continue sdate = list(map(int, row[0].split("/"))) date = datetime.datetime(year=sdate[2], month=sdate[0], day=sdate[1]) name, state, fevent_type, divno, region_code, ecode, divid, ftcdata_code = [a.strip() for a in row[1:9]] divno = int(divno) if region_code in ("pa", "esr"): # ftcpenn loads this better than ftcdata ever did (oops!) continue event_type = cls.EVENT_TYPE_MAP[fevent_type] event = None rcode = "ERROR" region = "ERROR" country = "USA" # append "Division" to the end of names if divid != 'x' and not name.endswith("Division"): name += " Division" if event_type == EventType.WORLD_CHAMPIONSHIP: franklin, edison, finals = OldChamps.mk_champs(2015, "2016-04-27", "2016-04-30") if ecode == 'ed': event = edison elif ecode == 'fr': event = franklin await cls.load_resq_finals(finals) elif event_type == EventType.SUPER_REGIONAL: rcode = region_code else: region, rcode = cls.resq_region(name, state, ecode, region_code) if state == "Canada": state, country = "Alberta", "Canada" if ecode.startswith("cmp") and len(ecode) > 3: ecode = ecode[3:] + "cmp" if event_type == EventType.REGIONAL_CMP: ecode = "cmp" if event is None: event = Event(key=f"1516{rcode}{ecode}", year=2015, name=name, state_prov=state, country=country, start_date=date, end_date=date, event_type=event_type, playoff_type=PlayoffType.STANDARD) if event_type != EventType.SUPER_REGIONAL: event.region = region if divid != 'x': event.key += str(divno) event.event_code = ftcdata_code with open(f"data/ftc-data/events/1516resq/{region_code.lower()}/" f"1516resq-{ftcdata_code}-MatchResultsDetails.html") as f: matches = ResultsPageHelper.load_match_details(BeautifulSoup(f.read(), 'lxml'), event.key) with open(f"data/ftc-data/events/1516resq/{region_code.lower()}/" f"1516resq-{ftcdata_code}-Rankings.html") as f: rankings = ResultsPageHelper.load_rankings(BeautifulSoup(f.read(), 'lxml'), matches) await EventHelper.insert_event(event, matches, rankings, None, tolerate_missing_finals=True, data_source="cheer4ftc ftc-data repository")
async def load_meets(cls, year): base_url = f"http://www.ftcpenn.org/ftc-events/{year}-{year+1}-season" if year == 2014: match_url = base_url + "/philadelphia-area-league-meets/match-results-details" rankings_url = base_url + "/philadelphia-area-league-meets/team-rankings" match_tables = BeautifulSoup( await cls.get(match_url), 'lxml').find( class_="sites-layout-tile sites-tile-name-content-1" ).find_all("table") rankings_tables = BeautifulSoup( await cls.get(rankings_url), 'lxml').find( class_="sites-layout-tile sites-tile-name-content-1" ).find_all("table") dates = [ mkdate(2014, 12, 10), mkdate(2014, 12, 11), mkdate(2015, 1, 14), mkdate(2015, 1, 15) ] for i, match_table, rankings_table in zip(range(1, 5), match_tables, rankings_tables): event_key = f"1415paphlm{i}" print("Processing", event_key) event = Event( key=event_key, year=year, name=f"Philadelphia Area League - Meet {i}", city="Philadelphia", state_prov="Pennsylvania", country="USA", start_date=dates[i - 1], end_date=dates[i - 1], event_type=EventType.MEET, region="Pennsylvania", advances_to="1415paphlcmp", venue="Temple University College of Engineering Building", address="1947 N 12th St. Philadelphia, PA", website="https://www.temple.edu", league_key="1314paphl", playoff_type=PlayoffType.STANDARD) matches = ResultsPageHelper.load_match_details( match_table, event.key) rankings = ResultsPageHelper.load_rankings( rankings_table, matches) await EventHelper.insert_event(event, matches, rankings, None, data_source="ftcpenn.org")
async def load_resq_finals(cls, finals): with open("data/old_champs/2015-2016/finals.html") as f: matches = ResultsPageHelper.load_matches(BeautifulSoup(f.read(), 'lxml').find("table"), "1516cmp0") for a, b, c in matches: await a.upsert() await b.upsert() await c.upsert() finals.data_sources = ["FTCData Original Research"] await finals.upsert() await AwardHelper.generate_winners_finalists(finals)
def load_matches(cls, data, event_key): matches = [] for m in data["matches"]: comp_level, mnum, set_number = ResultsPageHelper.parse_match_code( m['number']) match = Match(event_key=event_key, comp_level=comp_level, match_number=mnum, set_number=set_number) match.gen_keys() red_score, blue_score = m['scores']['red'], m['scores']['blue'] if red_score > blue_score: match.winner = "red" elif blue_score > red_score: match.winner = "blue" else: match.winner = "tile" red = MatchScore(key=match.red_key, alliance_color="red", event_key=event_key, match_key=match.key, dqed=[], teams=[], surrogates=[], total=red_score) blue = MatchScore(key=match.blue_key, alliance_color="blue", event_key=event_key, match_key=match.key, dqed=[], teams=[], surrogates=[], total=blue_score) for team in m['teams']['red']: red.teams.append(f'ftc{team["number"]}') if team['surrogate']: red.surrogates.append(f'ftc{team["number"]}') for team in m['teams']['blue']: blue.teams.append(f'ftc{team["number"]}') if team['surrogate']: blue.surrogates.append(f'ftc{team["number"]}') red.auto = m['subscoresRed']['auto'] red.teleop = m['subscoresRed']['tele'] red.endgame = m['subscoresRed']['endg'] red.penalty = m['subscoresRed']['pen'] blue.auto = m['subscoresBlue']['auto'] blue.teleop = m['subscoresBlue']['tele'] blue.endgame = m['subscoresBlue']['endg'] blue.penalty = m['subscoresBlue']['pen'] # TODO: load match details (if available) matches.append((match, red, blue)) return matches
async def load_2014(cls): year = 2014 # edison with open( "data/old_champs/2014-2015/MatchResultsDetails_World_Championship_Edison_T.html" ) as f: edison = ResultsPageHelper.load_match_details( BeautifulSoup(f.read(), 'lxml').find("table"), "1415cmp2") with open( "data/old_champs/2014-2015/MatchResultsDetails_World_Championship_Edison_Elim.html" ) as f: edison.extend( ResultsPageHelper.load_match_details( BeautifulSoup(f.read(), 'lxml').find("table"), "1415cmp2")) # franklin with open( "data/old_champs/2014-2015/MatchResultsDetails_World_Championship_Franklin_T.html" ) as f: franklin = ResultsPageHelper.load_match_details( BeautifulSoup(f.read(), 'lxml').find("table"), "1415cmp1") with open( "data/old_champs/2014-2015/MatchResultsDetails_World_Championship_Franklin_Elim.html" ) as f: franklin.extend( ResultsPageHelper.load_match_details( BeautifulSoup(f.read(), 'lxml').find("table"), "1415cmp1")) # finals with open( "data/old_champs/2014-2015/MatchResultsDetails_World_Championship_Finals.html" ) as f: finals = ResultsPageHelper.load_match_details( BeautifulSoup(f.read(), 'lxml').find("table"), "1415cmp0") # rankings with open( "data/old_champs/2014-2015/Rankings_World_Championship_Edison.html" ) as f: edison_rank = ResultsPageHelper.load_rankings( BeautifulSoup(f.read(), 'lxml').find("table"), edison) with open( "data/old_champs/2014-2015/Rankings_World_Championship_Franklin.html" ) as f: franklin_rank = ResultsPageHelper.load_rankings( BeautifulSoup(f.read(), 'lxml').find("table"), franklin) with open("data/old_champs/2014-2015/awards") as f: awards = cls.load_awards_file(f.read(), year, '1415cmp0') events = cls.mk_champs(year, "2015-04-22", "2015-04-25") await cls.finalize([ finals, franklin, edison, franklin_rank, edison_rank, events, awards ], events, year)
async def load_2011(cls): year = 2011 with open("data/old_champs/2011-2012/2011-2012FTCCMPResults") as f: data = f.read() with open("data/old_champs/2011-2012/awards") as f: awards_data = f.read() soup = BeautifulSoup(data, 'lxml') tables = list(soup.find_all("table")) finals = ResultsPageHelper.load_matches(tables[3], "1112cmp0") franklin = ResultsPageHelper.load_matches(tables[15], "1112cmp1") edison = ResultsPageHelper.load_matches(tables[14], "1112cmp2") franklin_rank = ResultsPageHelper.load_rankings(tables[13], franklin) edison_rank = ResultsPageHelper.load_rankings(tables[12], edison) events = cls.mk_champs(year, "2012-04-25", "2012-04-28") awards = cls.load_awards_file(awards_data, year, events[-1].key) await cls.finalize([ finals, franklin, edison, franklin_rank, edison_rank, events, awards ], events, year)
async def load_2010(cls): year = 2010 with open( "data/old_champs/2010-2011/2010-2011-ftc-world-championship-get-over-it!-results.html" ) as f: data = f.read() with open("data/old_champs/2010-2011/awards") as f: awards_data = f.read() soup = BeautifulSoup(data, 'lxml') tables = list(soup.find_all("table")) finals = ResultsPageHelper.load_matches(tables[0], "1011cmp0") edison = ResultsPageHelper.load_matches(tables[1], "1011cmp1") franklin = ResultsPageHelper.load_matches(tables[2], "1011cmp2") edison_rank = ResultsPageHelper.load_rankings(tables[3], edison) franklin_rank = ResultsPageHelper.load_rankings(tables[4], franklin) events = cls.mk_champs(year, "2011-04-27", "2011-04-30") awards = cls.load_awards_file(awards_data, year, events[-1].key) await cls.finalize([ finals, franklin, edison, franklin_rank, edison_rank, events, awards ], events, year)
def load_rankings(cls, data, matches, event_key): # since ftcscores has basically everything we need, we just load it right in! _, wlt = ResultsPageHelper.highscores_wlt(matches) rankings = [] for r in data['rankings']: c = r['current'] tkey = f"ftc{r['number']}" twlt = wlt[tkey] ranking = Ranking(event_key=event_key, team_key=tkey, rank=r['rank'], qp_rp=c['qp'], rp_tbp=c['rp'], high_score=c['highest'], wins=twlt[0], losses=twlt[1], ties=twlt[2], dqed=0, played=c['matches']) rankings.append(ranking) return rankings
async def load_year(cls, year): if year not in range(2013, 2018): raise ValueError("invalid year!") url = f"http://www.ftceast.org/tournament/tournament-results/{year}-{year+1}-Results" if year == 2013: date = datetime.datetime(year=2014, month=4, day=3) else: date = datetime.datetime(year=year + 1, month=3, day=2033 - year) url = url.lower() main_data = await cls.get(url) common_info = { "year": year, "city": "Scranton", "state_prov": "Pennsylvania", "country": "USA", "start_date": date, "end_date": date + datetime.timedelta(days=2), "event_type": EventType.SUPER_REGIONAL, "venue": "University of Scranton", "address": "", "website": "http://www.ftceast.org", } season = f"{year % 100:02}{(year + 1) % 100:02}" finals = Event(key=f"{season}esr0", name=f"FTC East Super Regional Championship", playoff_type=PlayoffType.BO3_FINALS, division_keys=[f"{season}esr1", f"{season}esr2"], **common_info) hopper = Event( key=f"{season}esr1", name=f"FTC East Super Regional Championship - Hopper Division", playoff_type=PlayoffType.STANDARD, parent_event_key=f"{season}esr0", **common_info) tesla = Event( key=f"{season}esr2", name=f"FTC East Super Regional Championship - Tesla Division", playoff_type=PlayoffType.STANDARD, parent_event_key=f"{season}esr0", **common_info) soup = BeautifulSoup(main_data, 'lxml') #print("Results for", finals.key) suffix = "" if year == 2013 else "-details" rank_page = "-ranking-list" if year == 2013 else "-team-rankings" awards = cls.load_awards(soup, finals) finals_details_data = await cls.get(url + "/finals-match-results" + suffix) match_details_table = BeautifulSoup( finals_details_data, 'lxml').find("th").find_parent("table") finals_matches = ResultsPageHelper.load_match_details( match_details_table, finals.key) hopper_details_data = await cls.get(url + "/hopper-match-results" + suffix) match_details_table = BeautifulSoup( hopper_details_data, 'lxml').find("th").find_parent("table") hopper_matches = ResultsPageHelper.load_match_details( match_details_table, hopper.key) hopper_rank_data = await cls.get(url + "/hopper" + rank_page) hopper_rank_table = BeautifulSoup( hopper_rank_data, 'lxml').find("th").find_parent("table") hopper_rank = ResultsPageHelper.load_rankings(hopper_rank_table, hopper_matches) tesla_details_data = await cls.get(url + "/tesla-match-results" + suffix) match_details_table = BeautifulSoup( tesla_details_data, 'lxml').find("th").find_parent("table") tesla_matches = ResultsPageHelper.load_match_details( match_details_table, tesla.key) tesla_rank_data = await cls.get(url + "/tesla" + rank_page) tesla_rank_table = BeautifulSoup( tesla_rank_data, 'lxml').find("th").find_parent("table") tesla_rank = ResultsPageHelper.load_rankings(tesla_rank_table, tesla_matches) await EventHelper.insert_event(hopper, hopper_matches, hopper_rank, None, data_source="ftceast.org") await EventHelper.insert_event(tesla, tesla_matches, tesla_rank, None, data_source="ftceast.org") await EventHelper.insert_event(finals, finals_matches, None, awards, divisions=[hopper, tesla], data_source="ftceast.org")
async def load_1617velv(cls): tasks = [] with open("data/ftc-data/events/1617velv/1617velv-event-list.csv") as f: csv_reader = csv.reader(f.read().split("\n")) for row in csv_reader: if not row: continue sdate = list(map(int, row[0].split("/"))) date = datetime.datetime(year=sdate[2], month=sdate[0], day=sdate[1]) name, state, fevent_type, _, region_code, ecode, divid, ftcdata_code, state_abbr, data_quality = [a.strip() for a in row[1:]] name = name.strip() if region_code in ("pa", "esr"): # ftcpenn loads this better than ftcdata ever did because it provides awards data, # there's no point in us covering it continue event_type = cls.EVENT_TYPE_MAP[fevent_type] if state.endswith(" SR"): event_type = EventType.SUPER_REGIONAL elif state.startswith("CMP "): event_type = EventType.WORLD_CHAMPIONSHIP divno = -1 # append "Division" to the end of names if (ecode.endswith("d0") or ecode.endswith("d1") or ecode.endswith("d2")): divno = int(ecode[-1]) ecode = ecode[:-2] #name += " Division" region = None rcode = { "txno": "txntx", "txwp": "txph", "nynyc": "nyc", "io": "ia", "nm": "az", }.get(region_code, region_code) if ecode == "cmphs": rcode = "mihs" elif rcode in ("wsr", "nsr", "ssr", "cmptx", "cmpmo"): ecode = "" region = None if ecode: region = await RegionHelper.region_unabbrev(rcode) if "Canada" in name: country = "Canada" else: country = "USA" event = Event(key=f"1617{rcode}{ecode}", year=2016, name=name, state_prov=state, country=country, start_date=date, end_date=date, event_type=event_type, region=region, playoff_type=PlayoffType.STANDARD) if divno > -1: if divno == 0: event.division_keys = [event.key + "1", event.key + "2"] else: event.parent_event_key = event.key + "0" event.key += str(divno) event.event_code = ftcdata_code base = f"data/ftc-data/events/1617velv/{region_code.lower()}/1617velv-{ftcdata_code}" if os.path.exists(base + "-MatchResultsDetails.html"): with open(base + "-MatchResultsDetails.html") as f: matches = ResultsPageHelper.load_match_details(BeautifulSoup(f.read(), 'lxml'), event.key) if os.path.exists(base + "-MatchResultsRaw.csv"): MatchDetailsHelper.parse_ftcdata_csv(matches, base + "-MatchResultsRaw.csv") elif os.path.exists(base + "-MatchResults.html"): with open(base + "-MatchResults.html") as f: matches = ResultsPageHelper.load_matches(BeautifulSoup(f.read(), 'lxml'), event.key) else: print("warning: ", event.key, "don't exists!") continue with open(base + "-Rankings.html") as f: rankings = ResultsPageHelper.load_rankings(BeautifulSoup(f.read(), 'lxml'), matches) tasks.append(asyncio.create_task(EventHelper.insert_event(event, matches, rankings, None, tolerate_missing_finals=True, data_source="cheer4ftc ftc-data repository"))) #print("loaded " + event.key) await asyncio.gather(*tasks)