def collect(teamNum): scores = Raschietto.from_url( "https://vexdb.io/teams/view/{}?t=rankings".format(teamNum)) # rankPointer = Matcher('.rank') # ranksList = rankPointer(scores, multiple=True) # try: # ranksList.remove(ranksList[0]) # except: pass # ranksList = list(map(int, ranksList)) maxScorePointer = Matcher('.max_score') maxScoreList = maxScorePointer(scores, multiple=True) try: maxScoreList.remove(maxScoreList[0]) maxScoreList = list(map(int, maxScoreList)) except: pass OPRPointer = Matcher('.opr') OPRList = OPRPointer(scores, multiple=True) try: OPRList.remove(OPRList[0]) OPRList = list(map(float, OPRList)) except: pass # pdTeamRanks = pd.Series(data=ranksList) pdTeamMAXSCORES = pd.Series(data=maxScoreList) pdTeamOPRS = pd.Series(data=OPRList) # output = [pdTeamRanks[0:6].mean(), pdTeamMAXSCORES[0:6].mean(), pdTeamOPRS[0:6].mean()] output = [pdTeamMAXSCORES[0:3].mean(), pdTeamOPRS[0:3].mean()] return output
def collectRanking(teamNum:str, url:str=''): if(url != ''): pass else: with open('data/urls.txt', 'r') as filehandle: url = filehandle.read() url = url.replace("https://www.robotevents.com/robot-competitions/vex-robotics-competition/", '') url = url.replace(".html", '') url = url.replace("\n", '') if url.startswith("RE-VRC") == True: pass else: print("Invalid URL. Try updating again or try 'rankings <vexdb>'.") return 0 url = 'https://vexdb.io/events/view/' + url+'?t=rankings' with open('data/urls.txt', 'a+') as filehandle2: filehandle2.write("\n"+url) page = Raschietto.from_url(url) rankNums = Matcher('.rank') rankNums = rankNums(page, multiple=True) try: rankNums.remove("Rank") except: pass teamNums = Matcher('.number') teamNums = teamNums(page, multiple=True) try: teamNums.remove("Number") except: pass index = teamNums.index(teamNum) rank = rankNums[index] return rank
def collectRankRE(): with open('data/urls.txt', 'r') as filehandle: url = filehandle.read() url = url.split('\n') url = url[0] page = Raschietto.from_url(url) table = Matcher('#division-1 > div > div.col-md-4 > h4') table = table(page, multiple=True) print(table)
def update(self): """Get the latest data from CNN.""" from raschietto import Raschietto, Matcher if (self.data is None or datetime.today().isoweekday() != 6 or (datetime.today().isoweekday() == 7 and datetime.today().hour > 17)): page = Raschietto.from_url(self._resource) _LOGGER.debug("CNN page loaded") futures_matcher = Matcher(".wsod_bold.wsod_aRight") futures = futures_matcher(page, multiple=True) sp = futures[0].split("\n") self.data = futures
def collect(data, team_num): scores = Raschietto.from_url( "https://vexdb.io/teams/view/{}?t=rankings".format(team_num)) rankPointer = Matcher('.rank') ranksList = rankPointer(scores, multiple=True) try: ranksList.remove(ranksList[0]) except: pass try: ranksList = list(map(int, ranksList[0:7])) except: ranksList = list(map(int, ranksList)) maxScorePointer = Matcher('.max_score') maxScoreList = maxScorePointer(scores, multiple=True) try: maxScoreList.remove(maxScoreList[0]) try: maxScoreList = list(map(int, maxScoreList[0:7])) except: maxScoreList = list(map(int, maxScoreList)) except: pass OPRPointer = Matcher('.opr') OPRList = OPRPointer(scores, multiple=True) try: OPRList.remove(OPRList[0]) try: OPRList = list(map(float, OPRList[0:7])) except: OPRList = list(map(float, OPRList)) except: pass return [ranksList, maxScoreList, OPRList]
def extract_arxiv_data(url): paper_data = {} page = Raschietto.from_url(url) title_matcher = Matcher( "#abs > h1", mapping=lambda element, page: Raschietto.element_to_text(element)[6:]) authors_matcher = Matcher("#abs > div.authors", mapping=lambda element, page: Raschietto. element_to_text(element)[8:].split(", ")) year_matcher = Matcher( "#abs > div.dateline", mapping=lambda element, page: Raschietto.element_to_text(element)) paper_data["title"] = title_matcher(page) paper_data["authors"] = authors_matcher(page) year = year_matcher(page) paper_data["year"] = year[year.rfind("20"):year.rfind("20") + 4] paper_data["arxiv_id"] = url[url.find("abs") + 4:] return paper_data
time.sleep(0.2) choice = '' url = '' spreadsheet='' status = '5327B' while choice != 'exit': with open('data/urls.txt', 'r') as filehandle: info = filehandle.read() info = info.split("\n") try: robotevents = info[0] page = Raschietto.from_url(robotevents) title = Matcher('.panel-title') title = title(page, multiple=False) status = title except: print("No Robot Events URL Entered. To fix this issue, run 'update <url>'") choice = input('GaelScout: '+status+'$ ') if choice.startswith("help") == True: print("") print("Current commands:") print("about (gives brief overview)") print("update <robot-events-url>") print("rankings <vex-db> (if used after update, vexdb url is unnecessary)") print("current (displays currently stored tournament)") print("add <team-number> (adds team number to the database to be analyzed)")
def rankings(url:str=''): if(url != ''): pass else: with open('data/urls.txt', 'r') as filehandle: url = filehandle.read() url = url.replace("https://www.robotevents.com/robot-competitions/vex-robotics-competition/", '') url = url.replace(".html", '') url = url.replace("\n", '') if url.startswith("RE-VRC") == True: pass else: print("Invalid URL. Try updating again or try 'rankings <vexdb>'.") return 0 url = 'https://vexdb.io/events/view/' + url+'?t=rankings' with open('data/urls.txt', 'a+') as filehandle2: filehandle2.write("\n"+url) page = Raschietto.from_url(url) rankNums = Matcher('.rank') rankNums = rankNums(page, multiple=True) try: rankNums.remove("Rank") except: pass rankNums = pd.Series(data=rankNums) teamNums = Matcher('.number') teamNums = teamNums(page, multiple=True) try: teamNums.remove("Number") except: pass teamNums = pd.Series(data=teamNums) teamNames = Matcher('.name>a') teamNames = teamNames(page, multiple=True) try: teamNames.remove("Name") except: pass teamNames = pd.Series(data=teamNames) wlts = Matcher('.wlt') wlts = wlts(page, multiple=True) try: wlts.remove("W-L-T") except: pass wlts = pd.Series(data=wlts) wpsps = Matcher('.wpsp') wpsps = wpsps(page, multiple=True) try: wpsps.remove("WP / AP / SP") except: pass wpsps = pd.Series(data=wpsps) maxScores = Matcher('.max_score') maxScores = maxScores(page, multiple=True) try: maxScores.remove("Max Score") except: pass maxScores = pd.Series(data=maxScores) trsps = Matcher('.trsp') trsps = trsps(page, multiple=True) try: trsps.remove("TRSPs") except: pass try: trsps.remove("TRSPs ") except: pass trsps = pd.Series(data=trsps) oprs = Matcher('.opr') oprs = oprs(page, multiple=True) try: oprs.remove("OPR") except: pass try: oprs.remove("OPR ") except: pass oprs = pd.Series(data=oprs) dprs = Matcher('.dpr') dprs = dprs(page, multiple=True) try: dprs.remove("DPR") except: pass try: dprs.remove("DPR ") except: pass dprs = pd.Series(data=dprs) ccwms = Matcher('.ccwm') ccwms = ccwms(page, multiple=True) try: ccwms.remove('CCWM') except: pass try: ccwms.remove('CCWM ') except: pass ccwms = pd.Series(data=ccwms) overallRankings = pd.DataFrame({"Rank":rankNums,"Numbers":teamNums,"Name":teamNames,"W-L-T":wlts,"WPSP":wpsps,"Max Score":maxScores,"TRSP":trsps,"OPR":oprs,"DPR":dprs,"CCWM":ccwms}) overallRankings = overallRankings[["Rank", "Numbers", "Name", "W-L-T", "WPSP", "Max Score", "TRSP", "OPR", "DPR", "CCWM"]] # print(overallRankings) writer = pd.ExcelWriter("data/rankings.xlsx") overallRankings.to_excel(writer, sheet_name="Rankings") overallRankings.to_pickle("data/rankings.pkl") if platform == "darwin": try: os.system("open -a 'Microsoft Excel.app' 'data/rankings.xlsx'") except: print("Failed to open file. Please open it on your own.") elif platform == "win32": try: os.system("open -a 'Microsoft Excel.exe' 'data/rankings.xlsx'") except: print("Failed to open file. Please open it on your own.")
def dashboard(request, team_number): team = Teams.objects.get(name=team_number) data = { 'team': team, } matchesr1 = Matches.objects.filter(red1=team_number) matchesr2 = Matches.objects.filter(red2=team_number) matchesb1 = Matches.objects.filter(blue1=team_number) matchesb2 = Matches.objects.filter(blue2=team_number) # data['matches'] = sorted(chain(matchesr1, matchesr2, matchesb1, matchesb2), key=lambda instance: instance.order) # matchlist = [] # matches = matchesr1, matchesr2, matchesb1, matchesb2 # for match in matches: # for m in match: # matchlist.append(m) # for i in matchlist: # q = i.number # q = q.replace("Q", "") # q = int(q) # i.order = q # data['matches'] = sorted(chain(matchesr1, matchesr2, matchesb1, matchesb2), key=attrgetter('order')) try: t = ResearchTeams.objects.get(name=team_number) matchlist = [] matches = matchesr1, matchesr2, matchesb1, matchesb2 for match in matches: for m in match: matchlist.append(m) for i in matchlist: q = i.number q = q.replace("Q", "") q = int(q) i.order = q data['matches'] = sorted(chain(matchesr1, matchesr2, matchesb1, matchesb2), key=attrgetter('order')) data['research'] = t except: pass # team is not in research division scores = Raschietto.from_url( "https://vexdb.io/teams/view/{}?t=rankings".format(team_number)) OPRPointer = Matcher('.opr') OPRList = OPRPointer(scores, multiple=True) try: OPRList.remove(OPRList[0]) except: pass OPRList = list(map(float, OPRList)) js_opr = json.dumps(OPRList) rankPointer = Matcher('.rank') ranksList = rankPointer(scores, multiple=True) try: ranksList.remove(ranksList[0]) except: pass ranksList = list(map(int, ranksList)) js_ranks = json.dumps(ranksList) maxScorePointer = Matcher('.max_score') maxScoreList = maxScorePointer(scores, multiple=True) try: maxScoreList.remove(maxScoreList[0]) except: pass maxScoreList = list(map(int, maxScoreList)) js_mscores = json.dumps(maxScoreList) data["js_oprs"] = js_opr data["js_ranks"] = js_ranks data["js_mscores"] = js_mscores return render(request, 'dashboard.html', data)
def predictions(division): gc = pygsheets.authorize(service_file='service_creds.json') sh = gc.open('Bot_Predictions') output = gc.open('Engineering Division') divisions = [ "Science", "Technology", "Research", "Engineering", "Arts", "Math" ] wks = sh.worksheet_by_title(division) teams = [] teamNames = [] organizations = [] locations = [] for row in wks: for column in row: if row.index(column) % 6 == 0: teams.append(column) teamNames.append(row[row.index(column) + 1]) organizations.append(row[row.index(column) + 2]) locations.append(row[row.index(column) + 3]) teams.remove(teams[0]) teamNames.remove(teamNames[0]) organizations.remove(organizations[0]) locations.remove(locations[0]) teamNums = [] teamNums = teams # teamNames = [] # organizations = [] # locations = [] # # for i in range(len(teams)): # if (i + 1) % 4 == 0: # locations.append(teams[i].replace('\n', ' ')) # organizations.append(teams[i - 1]) # teamNames.append(teams[i - 2]) # teamNums.append(teams[i - 3]) allEvents = [] allRanks = [] allWLTs = [] allWPSPs = [] allMAXSCORES = [] allOPRS = [] bar = Bar('Collecting Data', max=len(teamNums)) for teamNumber in teamNums: scores = Raschietto.from_url( "https://vexdb.io/teams/view/{}?t=rankings".format(teamNumber)) eventPointer = Matcher('.event') eventsList = eventPointer(scores, multiple=True) try: allEvents.append(eventsList[1]) except: allEvents.append("New Team") try: eventsList.remove(eventsList[0]) except: pass # teamEventsDict[teamNumber] = eventsList rankPointer = Matcher('.rank') ranksList = rankPointer(scores, multiple=True) try: allRanks.append(ranksList[1]) except: allRanks.append("New Team") try: ranksList.remove(ranksList[0]) except: pass ranksList = list(map(int, ranksList)) # teamRanksDict[teamNumber] = ranksList wltPointer = Matcher('.wlt') wltList = wltPointer(scores, multiple=True) try: allWLTs.append(wltList[1] + " W-L-T") except: allWLTs.append("New Team") try: wltList.remove("W-L-T") except: pass newWLTList = [] for i in wltList: i += " score" newWLTList.append(i) # teamWLTsDict[teamNumber] = eventsList WPSPPointer = Matcher('.wpsp') WPSPList = WPSPPointer(scores, multiple=True) try: allWPSPs.append(WPSPList[1]) except: allWPSPs.append("New Team") try: WPSPList.remove(WPSPList[0]) except: pass # teamWPSPsDict[teamNumber] = WPSPList maxScorePointer = Matcher('.max_score') maxScoreList = maxScorePointer(scores, multiple=True) try: allMAXSCORES.append(maxScoreList[1]) except: allMAXSCORES.append("New Team") try: maxScoreList.remove(maxScoreList[0]) except: pass maxScoreList = list(map(int, maxScoreList)) # teamMAXSCORESDict[teamNumber] = maxScoreList OPRPointer = Matcher('.opr') OPRList = OPRPointer(scores, multiple=True) try: allOPRS.append(OPRList[1]) except: allOPRS.append("New Team") try: OPRList.remove(OPRList[0]) except: pass OPRList = list(map(float, OPRList)) # teamOPRSDict[teamNumber] = OPRList pdTeamEvents = pd.Series(data=eventsList) pdTeamRanks = pd.Series(data=ranksList) pdTeamWLTs = pd.Series(data=newWLTList) pdTeamWPSPs = pd.Series(data=WPSPList) pdTeamMAXSCORES = pd.Series(data=maxScoreList) pdTeamOPRS = pd.Series(data=OPRList) pdAverages = pd.DataFrame([[ "Mean: ", pdTeamRanks.mean, '', '', pdTeamMAXSCORES.mean, pdTeamOPRS.mean ]]) pdTeamStats = pd.DataFrame({ "Events": pdTeamEvents, "Ranks": pdTeamRanks, "WLT": pdTeamWLTs, "WPSP": pdTeamWPSPs, "Max Score": pdTeamMAXSCORES, "OPR": pdTeamOPRS }) pdTeamStats.append(pdAverages) # pdTeamStats.to_excel(writer, sheet_name=teamNumber) pdTeamStats.to_pickle("divisiondata/" + teamNumber + ".pkl") # wks = output.add_worksheet(teamNumber) # wks.set_dataframe(pdTeamStats, (1,1)) # events = wks.cell('A1') # events.color = (0, 0, 0, 0) # events.set_text_format("foregroundColor", "#ffffff") bar.next() pdEvents = pd.Series(data=allEvents) pdRanks = pd.Series(data=allRanks) pdWLT = pd.Series(data=allWLTs) print(pdWLT) pdWPSP = pd.Series(data=allWPSPs) pdMaxScore = pd.Series(data=allMAXSCORES) pdOPR = pd.Series(data=allOPRS) pdTeamNums = pd.Series(data=teamNums) # pdTeamNames = pd.Series(data=teamNames) # pdOrganizations = pd.Series(data=organizations) # pdLocations = pd.Series(data=locations) # teamList = pd.DataFrame({ "Team Number: " : pdTeamNums, "Team Name: ": pdTeamNames, "Organization: ": pdOrganizations, "Location: ": pdLocations, "Most Recent Event: ": pdEvents, "Rank: ": pdRanks, "W-L-T: ": pdWLT, "WPSP: ": pdWPSP, "Max Score: ": pdMaxScore, "OPR: ": pdOPR}) teamList = pd.DataFrame({ "Team Number: ": pdTeamNums, "Most Recent Event: ": pdEvents, "Rank: ": pdRanks, "W-L-T: ": pdWLT, "WPSP: ": pdWPSP, "Max Score: ": pdMaxScore, "OPR: ": pdOPR }) # teamList = teamList[["Team Number: ", "Team Name: ", "Organization: ", "Location: ", "Most Recent Event: ", "Rank: ", "W-L-T: ", "WPSP: ", "Max Score: ", "OPR: "]] teamList = teamList[[ "Team Number: ", "Most Recent Event: ", "Rank: ", "W-L-T: ", "WPSP: ", "Max Score: ", "OPR: " ]] # wks = output.add_worksheet("teamlist") # wks.set_dataframe(teamList, (1,1)) teamList.to_pickle("divisiondata/teamList.pkl") bar.finish() print("Completed") print(output.url)
def update(url): # url = 'https://www.robotevents.com/robot-competitions/vex-robotics-competition/RE-VRC-17-2609.html' # url = 'https://www.robotevents.com/robot-competitions/vex-robotics-competition/RE-VRC-17-3805.html' # url = 'https://www.robotevents.com/robot-competitions/vex-robotics-competition/RE-VRC-17-4462.html' page = Raschietto.from_url(url) with open('data/urls.txt', 'w+') as filehandle: filehandle.write(url) team = Matcher('.table-bordered>tbody>tr>td') teams = team(page, multiple=True) teamNums = [] teamNames = [] organizations = [] locations = [] for i in range(len(teams)): if (i + 1) % 4 == 0: locations.append(teams[i].replace('\n', ' ')) organizations.append(teams[i - 1]) teamNames.append(teams[i - 2]) teamNums.append(teams[i - 3]) allEvents = [] allRanks = [] allWLTs = [] allWPSPs = [] allMAXSCORES = [] allOPRS = [] # teamEventsDict = {} # teamRanksDict = {} # teamWLTsDict = {} # teamWPSPsDict = {} # teamMAXSCORESDict = {} # teamOPRSDict = {} bar = Bar('Collecting Data', max=len(teamNums)) wb = Workbook() wb.create_sheet('teamlist') # print(teamNums) for number in teamNums: wb.create_sheet(number) writer = pd.ExcelWriter("data/data.xlsx") for teamNumber in teamNums: scores = Raschietto.from_url( "https://vexdb.io/teams/view/{}?t=rankings".format(teamNumber)) eventPointer = Matcher('.event') eventsList = eventPointer(scores, multiple=True) try: allEvents.append(eventsList[1]) except: allEvents.append("New Team") try: eventsList.remove(eventsList[0]) except: pass # teamEventsDict[teamNumber] = eventsList rankPointer = Matcher('.rank') ranksList = rankPointer(scores, multiple=True) try: allRanks.append(ranksList[1]) except: allRanks.append("New Team") try: ranksList.remove(ranksList[0]) except: pass ranksList = list(map(int, ranksList)) # teamRanksDict[teamNumber] = ranksList wltPointer = Matcher('.wlt') wltList = wltPointer(scores, multiple=True) try: allWLTs.append(wltList[1]) except: allWLTs.append("New Team") try: wltList.remove(wltList[0]) except: pass # teamWLTsDict[teamNumber] = eventsList WPSPPointer = Matcher('.wpsp') WPSPList = WPSPPointer(scores, multiple=True) try: allWPSPs.append(WPSPList[1]) except: allWPSPs.append("New Team") try: WPSPList.remove(WPSPList[0]) except: pass # teamWPSPsDict[teamNumber] = WPSPList maxScorePointer = Matcher('.max_score') maxScoreList = maxScorePointer(scores, multiple=True) try: allMAXSCORES.append(maxScoreList[1]) except: allMAXSCORES.append("New Team") try: maxScoreList.remove(maxScoreList[0]) except: pass maxScoreList = list(map(int, maxScoreList)) # teamMAXSCORESDict[teamNumber] = maxScoreList OPRPointer = Matcher('.opr') OPRList = OPRPointer(scores, multiple=True) try: allOPRS.append(OPRList[1]) except: allOPRS.append("New Team") try: OPRList.remove(OPRList[0]) except: pass OPRList = list(map(float, OPRList)) # teamOPRSDict[teamNumber] = OPRList pdTeamEvents = pd.Series(data=eventsList) pdTeamRanks = pd.Series(data=ranksList) pdTeamWLTs = pd.Series(data=wltList) pdTeamWPSPs = pd.Series(data=WPSPList) pdTeamMAXSCORES = pd.Series(data=maxScoreList) pdTeamOPRS = pd.Series(data=OPRList) pdTeamStats = pd.DataFrame({ "Events": pdTeamEvents, "Ranks": pdTeamRanks, "WLT": pdTeamWLTs, "WPSP": pdTeamWPSPs, "Max Score": pdTeamMAXSCORES, "OPR": pdTeamOPRS }) pdTeamStats.to_excel(writer, sheet_name=teamNumber) pdTeamStats.to_pickle("team_dataframes/" + teamNumber + ".pkl") bar.next() pdEvents = pd.Series(data=allEvents) pdRanks = pd.Series(data=allRanks) pdWLT = pd.Series(data=allWLTs) pdWPSP = pd.Series(data=allWPSPs) pdMaxScore = pd.Series(data=allMAXSCORES) pdOPR = pd.Series(data=allOPRS) pdTeamNums = pd.Series(data=teamNums) pdTeamNames = pd.Series(data=teamNames) pdOrganizations = pd.Series(data=organizations) pdLocations = pd.Series(data=locations) teamList = pd.DataFrame({ "Team Number: ": pdTeamNums, "Team Name: ": pdTeamNames, "Organization: ": pdOrganizations, "Location: ": pdLocations, "Most Recent Event: ": pdEvents, "Rank: ": pdRanks, "W-L-T: ": pdWLT, "WPSP: ": pdWPSP, "Max Score: ": pdMaxScore, "OPR: ": pdOPR }) teamList = teamList[[ "Team Number: ", "Team Name: ", "Organization: ", "Location: ", "Most Recent Event: ", "Rank: ", "W-L-T: ", "WPSP: ", "Max Score: ", "OPR: " ]] teamList.to_excel(writer, 'teamlist') teamList.to_pickle("team_dataframes/teamList.pkl") # df = pd.read_pickle(file_name) wb.save("data/data.xlsx") writer.save() wb = load_workbook("data/data.xlsx") sheet = wb.get_sheet_by_name('teamlist') sheet.column_dimensions['B'].width = len(max(teamNums)) * 1.2 sheet.column_dimensions['C'].width = len(max(teamNames)) * 1.2 sheet.column_dimensions['D'].width = len(max(organizations)) * 1.2 sheet.column_dimensions['E'].width = len(max(locations)) * 0.8 sheet.column_dimensions['F'].width = 40 # len(max(allEvents)) sheet.column_dimensions['G'].width = 6 # len(max(allRanks)) * 1.2 sheet.column_dimensions['H'].width = len(max(allWLTs)) * 1.2 sheet.column_dimensions['I'].width = len(max(allWPSPs)) * 1.2 sheet.column_dimensions['J'].width = 10 #len(max(allMAXSCORES)) * 1.2 sheet.column_dimensions['K'].width = len(max(allOPRS)) * 1.2 wb.save("data/data.xlsx") bar.finish() print("Completed") # For Mac if platform == "darwin": try: os.system("open -a 'Microsoft Excel.app' 'data/data.xlsx'") except: print("Failed to open file. Please open it on your own.") elif platform == "win32": try: os.system("open -a 'Microsoft Excel.exe' 'data/data.xlsx'") except: print("Failed to open file. Please open it on your own.")
from raschietto import Raschietto, Matcher import pandas as pd from collect import collect import random import pickle from progress.bar import Bar from multiprocessing import Process teamPage = Raschietto.from_url("https://vexdb.io/teams?p=1") maxPageNum = Matcher(".pagination > li") maxPageNum = maxPageNum(teamPage, multiple=True) maxPageNum = int(maxPageNum[len(maxPageNum) - 2]) start = random.randint(1, ((maxPageNum * 50))) try: with open('teams.pkl', 'rb') as f: teams = pickle.load(f) # print(len(teams)) except: print("No team list exists. Collecting data now.") bar = Bar('Collecting All Team #s', max= (maxPageNum * 50)) teams = [] teamList = [] orgs = [] for i in range(1, maxPageNum): teamPage = Raschietto.from_url("https://vexdb.io/teams?p={}".format(i)) teamPointer = Matcher('.number') teamList = teamPointer(teamPage, multiple=True)
teamList = df["Team Number: "].tolist() # page = Raschietto.from_url("https://vexdb.io/events/view/RE-VRC-17-4462") # teamNums = Matcher('.number') # teamList = teamNums(page, multiple=True) teamStats = [] bar = Bar('Collecting Team Data', max=len(teamList)) for team in teamList: teamStats.append(collect(team)) bar.next() bar.finish() page = Raschietto.from_url( "https://vexdb.io/events/view/RE-VRC-17-4462?t=results") results = Matcher('.result-box') results = results(page, multiple=True) blueScores = [] blueAlliance1 = [] blueAlliance2 = [] redScores = [] redAlliance1 = [] redAlliance2 = [] for i in range(len(results)): if (i + 1) % 6 == 0: blueScores.append(results[i]) redScores.append(results[i - 1]) blueAlliance1.append(results[i - 3])
def model(teamName): url = "https://vexdb.io/teams/view/{}?t=results".format(teamName) page = Raschietto.from_url(url) link_matcher = Matcher.link("#table-content > div > table > thead > tr > td > a", startswith="https://vexdb.io") # results = Matcher('.result-red' or '.result-red-fade') teamEvents = link_matcher(page, multiple=True) for i in teamEvents: teamEvents[teamEvents.index(i)] = "{}?t=results".format(i) print(teamName) print("Found events") print(teamEvents) for event in teamEvents: header = Matcher(".page-header") header = header(page, multiple=False) print("Scraping event: {}".format(header)) url = event page = Raschietto.from_url(url) results = Matcher('.result-box') results = results(page, multiple=True) blueScores = [] blueAlliance1 = [] blueAlliance2 = [] redScores = [] redAlliance1 = [] redAlliance2 = [] matches = [] for i in range(len(results)): if (i+1) % 6 == 0: blueScores = results[i] redScores = results[i-1] blueAlliance1 = results[i-3] blueAlliance2 = results[i-2] redAlliance1 = results[i-5] redAlliance2 = results[i-4] winloss = 0 winner = '' if teamName == blueAlliance1 or teamName == blueAlliance2 or teamName == redAlliance1 or teamName == redAlliance2: if teamName == blueAlliance1 or teamName == blueAlliance2: if blueScores > redScores: winloss = 1 winner = 'B' else: winloss = 0 winner = 'R' elif teamName == redAlliance1 or teamName == redAlliance2: if redScores >= blueScores: winloss = 1 winner = 'R' else: winloss = 0 winner = 'B' try: matches.append([blueAlliance1, blueAlliance2, redAlliance1, redAlliance2, int(blueScores), int(redScores), winloss, winner]) except: pass else: pass for match in matches: b1 = collect(match[0])[1] b2 = collect(match[1])[1] r1 = collect(match[2])[1] r2 = collect(match[3])[1] if match[7] == "B": oprd = [((b1+b1)-(r1+r2)), match[6]] elif match[7] == "R": oprd = [((r1+r2)-(b1+b1)), match[6]] print(oprd)
for i in teams[team]: if i > 239: del teams[team] continue except: pass try: if np.isnan(teams[team][0]): del teams[team] continue except: pass try: teamPage = Raschietto.from_url( "https://vexdb.io/teams/view/{}?t=rankings".format(team)) season = Matcher(".text-center") season = season(teamPage, multiple=True) if season[0] != "In The Zone": del teams[team] print("FOUND ONE!") except: pass bar.finish() with open('sri/data.pkl', 'wb') as handle: pickle.dump(teams, handle, protocol=pickle.HIGHEST_PROTOCOL) # print(teams) exit() # sns.set(color_codes=True) # # with open('sri/data.pkl', 'rb') as handle:
def collect(teamNum): scores = Raschietto.from_url( "https://vexdb.io/teams/view/{}?t=rankings".format(teamNum)) eventPointer = Matcher('.event') eventsList = eventPointer(scores, multiple=True) try: eventsList.remove(eventsList[0]) except: eventsList.append("New Team") # teamEventsDict[teamNumber] = eventsList rankPointer = Matcher('.rank') ranksList = rankPointer(scores, multiple=True) try: ranksList.remove(ranksList[0]) except: pass ranksList = list(map(int, ranksList)) # teamRanksDict[teamNumber] = ranksList wltPointer = Matcher('.wlt') wltList = wltPointer(scores, multiple=True) try: wltList.remove(wltList[0]) except: wltList.append("New Team") # teamWLTsDict[teamNumber] = eventsList WPSPPointer = Matcher('.wpsp') WPSPList = WPSPPointer(scores, multiple=True) try: WPSPList.remove(WPSPList[0]) except: WPSPList.append("New Team") # teamWPSPsDict[teamNumber] = WPSPList maxScorePointer = Matcher('.max_score') maxScoreList = maxScorePointer(scores, multiple=True) try: maxScoreList.remove(maxScoreList[0]) maxScoreList = list(map(int, maxScoreList)) except: pass # teamMAXSCORESDict[teamNumber] = maxScoreList OPRPointer = Matcher('.opr') OPRList = OPRPointer(scores, multiple=True) try: OPRList.remove(OPRList[0]) OPRList = list(map(float, OPRList)) except: pass # teamOPRSDict[teamNumber] = OPRList pdTeamEvents = pd.Series(data=eventsList) pdTeamRanks = pd.Series(data=ranksList) pdTeamWLTs = pd.Series(data=wltList) pdTeamWPSPs = pd.Series(data=WPSPList) pdTeamMAXSCORES = pd.Series(data=maxScoreList) pdTeamOPRS = pd.Series(data=OPRList) pdTeamStats = pd.DataFrame({ "Events": pdTeamEvents, "Ranks": pdTeamRanks, "WLT": pdTeamWLTs, "WPSP": pdTeamWPSPs, "Max Score": pdTeamMAXSCORES, "OPR": pdTeamOPRS }) pdTeamStats.to_pickle("team_dataframes/" + teamNum + ".pkl")