def test_write_h5(hdf5_tempfile): data = [ ["1", "A towel,", "1.0"], ["42", " it says, ", "2.0"], ["1337", "is about the most ", "-1"], ["0", "massively useful thing ", "123"], ["-2", "an interstellar hitchhiker can have.\n", "3"], ] with pytest.raises(NotImplementedError): write(hdf5_tempfile, data)
def test_write_csv(csv_tempfile): newline = "\n" data = [ ["1", "A towel,", "1.0"], ["42", " it says, ", "2.0"], ["1337", "is about the most ", "-1"], ["0", "massively useful thing ", "123"], ["-2", "an interstellar hitchhiker can have.\n", "3"], ] write(csv_tempfile, data, newline=newline) data_read = read(csv_tempfile, newline=newline) assert data == data_read
def test_write_pickle_protocol(pickle_tempfile): data = { "a list": [1, 42, 3.141, 1337, "help", "€"], "a string": "bla", "another dict": { "foo": "bar", "key": "value", "the answer": 42 }, } write(pickle_tempfile, data, protocol=0) data_read = read(pickle_tempfile) assert data == data_read
def test_write_json(json_tempfile): data = { "a list": [1, 42, 3.141, 1337, "help", "€"], "a string": "bla", "another dict": { "foo": "bar", "key": "value", "the answer": 42 }, } write(json_tempfile, data) data_read = read(json_tempfile) assert data == data_read
def getGameID(): #http://www.espn.com/college-football/scoreboard/_/group/80/year/2017/seasontype/2/week/1 urlFirst = "http://www.espn.com/college-football/scoreboard/_/group/80/year/2017/seasontype/2/week/" counter = 1 gameIDS = {} while (counter < 16): browser = webdriver.Chrome('C:\webdrivers\chromedriver.exe') browser.set_window_size(1120, 550) browser.get(urlFirst + str(counter)) #WebDriverWait(browser, 3) source = browser.page_source browser.close() soup = BS(source, 'html5lib') idsToGet = soup.find_all('a', class_="mobileScoreboardLink") toBeParsed = [] for i in idsToGet: toBeParsed.append(str(i)) ids = parse(toBeParsed) gameIDS["Week " + str(counter)] = ids counter += 1 browser = webdriver.Chrome('C:\webdrivers\chromedriver.exe') browser.set_window_size(1120, 550) browser.get( "http://www.espn.com/college-football/scoreboard/_/group/80/year/2017/seasontype/3/week/1" ) #WebDriverWait(browser, 3) source = browser.page_source browser.close() soup = BS(source, 'html5lib') idsToGet = soup.find_all('a', class_="mobileScoreboardLink") toBeParsed = [] for i in idsToGet: toBeParsed.append(str(i)) ids = parse(toBeParsed) gameIDS["Bowls"] = ids io.write('gameIDS.json', gameIDS)
def test_write_json_params(json_tempfile): data = { "a list": [1, 42, 3.141, 1337, "help", "€"], "a string": "bla", "another dict": { "foo": "bar", "key": "value", "the answer": 42 }, } write( json_tempfile, data, indent=4, sort_keys=True, separators=(",", ":"), ensure_ascii=False, ) data_read = read(json_tempfile) assert data == data_read
def test_write_jsonl(jsonl_tempfile): data = [ { "some": "thing" }, { "foo": 17, "bar": False, "quux": True }, { "may": { "include": "nested", "objects": ["and", "arrays"] } }, ] write(jsonl_tempfile, data) data_read = read(jsonl_tempfile) assert data == data_read
except: temp.append(" ") temp.append(" ") try: temp.append(i[0][2][5]) except: print(" ") temp.append(i[0][8][1]) try: temp.append(i[0][6][1]) except: temp.append(" ") winnersFinal.append(temp) except: pdb.set_trace() mi.write('gameWinnersTable.csv', winnersFinal) for i in mainDict: temp = [] try: temp.append(mainDict[i]['Loser']) temp.append(i) losersFirst.append(temp) except: pass losersFinal = [] losersFinal.append(header) for i in losersFirst: try: temp = []
import mpu.io as mi import pdb mainDict = mi.read('gameParticipationTableDictionary.json') listOfParticipation = [] pdb.set_trace() for i in mainDict: temp = [] for j in mainDict[i]: temp.append([i, j]) for j in temp: listOfParticipation.append(j) f = open('output.txt', 'w') for i in listOfParticipation: print(i, file=f) header = ['Game_No', 'Team_ID'] finalList = [] finalList.append(header) for i in listOfParticipation: finalList.append(i) mi.write('team_played_inTable.csv', finalList)
url_to_put_to = format_endpoint(year) soup = BeautifulSoup(requests.get(url).text, features="html.parser") #get all of the divisions we are interested in #the website uses identical ids for different things, so we will need to deal with that first_layer_divs = soup.find_all("div", attrs={"class": "conference"}) for div in first_layer_divs: #let us package this up in something nice... a small class would be usefull conference_name = div.find("h1").text for x in div.find_all("a"): ids[x.text] = x['href'].split("/")[-2] teams = [team.text for team in div.find_all("a")] for x in teams: #add them to the list of things to post later relationships_to_add.append(Data_holder(year, conference_name, x)) [handle_year(x) for x in range(2009, 2020)] mi.write("team_ids.json", data=ids) exit(1) #go ahead and send them to the API for x in relationships_to_add: url = format_endpoint(x.year) requests.put(url, json={ "conference_name": x.conference_name, "team_name": x.team_name })
#this program will fix the formatting of the player tables (insert pseudo-id's) import mpu.io as mi import random playerTableDict = mi.read('playerTableDict.json') f = open('output.txt', 'w') count = 10000000 for i in playerTableDict: try: for j in playerTableDict[i]: if (len(j) == 9): j.insert(8, str(count)) count += 1 except: continue mi.write('fixedPlayerTableDict.json', playerTableDict) for i in playerTableDict: try: for j in playerTableDict[i]: print(len(j)) except: continue
import mpu.io as mi participationDict = mi.read('TeamsAndID.json') teamDict = mi.read('revisedTeamDict.json') tableDict = {} for i in participationDict: tempList = [] for j in participationDict[i]: tempList.append(teamDict[participationDict[i][j]]) tableDict[i] = tempList mi.write('gameParticipationTableDictionary.json', tableDict)
rosterDict['Miami (OH)'] = rosterDict.pop(i) elif i == 'Mississippi': rosterDict['Ole Miss'] = rosterDict.pop(i) elif i == 'North Carolina State': rosterDict['NC State'] = rosterDict.pop(i) elif i == 'UTSA': rosterDict['UT San Antonio'] = rosterDict.pop(i) player_played_in = {} for i in rosterDict: temp = [] try: for j in rosterDict[i]['2017']: temp.append(j) except: pass player_played_in[i] = temp mi.write('player_played_intemp.json', player_played_in) games = mi.read('TeamsAndID.json') player_played_in_table = {} for i in games: temp = [] for j in games[i]: temp.append(player_played_in[games[i][j]]) player_played_in_table[i] = temp mi.write('player_played_in_table.json', player_played_in_table)
if __name__ == '__main__': #example for endpoint "http://localhost:3000/players/team?school=Clemson&year=2019" #i'm going to establish a pho list of statistics to see what is required by the database schema and what needs to be added for x in [y for y in range(2009, 2020)]: conferences = get_conferences(x) #now that we have the conferences, we need to get the teams for conf_data in conferences: name = conf_data['name'] year = conf_data['year'] teams = get_teams_by_conference(name, year) for team_data in teams['teams']: #now we need to get the team_id, which is stored locally in the script team_id = teams_id[team_data['name']] team_name = team_data['name'] team_year = team_data['year'] result = get_players_by_team_name_and_year( team_name, team_year).json() for player_data in result: if (check_if_player_id_exists(team_name, team_year, player_data['player_id'])): player_source = get_player_home_index_page( team_id, player_data['player_id']) parse_player_home_index_page(player_source) break break mio.write('fields_for_schema.json', fields_for_schema)
if (i not in listOfteams): listOfFlags.append(i) revisedDict = mi.read('teamDict.json') #pdb.set_trace() for i in revisedDict: if (i in listOfFlags): print("Checking", i) if (i == "Connecticut"): revisedDict["UConn"] = revisedDict.pop(i) elif (i == "Florida International"): revisedDict["Florida Intl."] = revisedDict.pop(i) elif (i == "Louisiana-Lafayette"): revisedDict["Louisiana"] = revisedDict.pop(i) elif (i == "Louisiana-Monroe"): revisedDict["Louisiana Monroe"] = revisedDict.pop(i) elif (i == "Massachusetts"): revisedDict["UMass"] = revisedDict.pop(i) elif (i == "Miami (Florida)"): revisedDict["Miami"] = revisedDict.pop(i) elif (i == "Miami (Ohio)"): revisedDict["Miami (OH)"] = revisedDict.pop(i) elif (i == "Mississippi"): revisedDict["Ole Miss"] = revisedDict.pop(i) elif (i == "North Carolina State"): revisedDict["NC State"] = revisedDict.pop(i) elif (i == "UTSA"): revisedDict["UT San Antonio"] = revisedDict.pop(i) mi.write('revisedTeamDict.json', revisedDict) #pdb.set_trace()
#iterating through the dictionary gameTableData = {} for i in dictInfo: #pdb.set_trace() source = getSource(i) try: WinnerData = GSD.getTeam(source, dictInfo[i]['Winner']) except: print("game", i, "fail") try: LoserData = GSD.getTeam(source, dictInfo[i]['Loser']) except: print("game", i, "fail") tempDict = {} try: WinnerData.insert(0, dictInfo[i]['Winner']) stuff = WinnerData tempDict['Winner'] = stuff except: pass try: LoserData.insert(0, dictInfo[i]['Loser']) stuff = LoserData tempDict['Loser'] = stuff except: pass gameTableData[i] = tempDict mi.write('gameTableData.json', gameTableData)
newList.append(fourth) thingToReplace[0] = newList tempTwoDict[temp] = thingToReplace except: pdb.set_trace() except: pdb.set_trace() tempOneDict[j] = tempTwoDict newDict[i] = tempOneDict for i in newDict: pass mi.write('testingDict.json', newDict) testingOutput = open('newTest.txt', 'w') for i in newDict: for j in newDict[i]: for k in newDict[i][j]: #print(k, file = testingOutput) print("INSERT INTO", k, "(", ",".join(newDict[i][j][k][0]), ",", "PlayerID", ")", file=testingOutput) print('VALUES(',
tableHeaderSoup = BS(str(tableRows[0]), 'lxml') tableDataSoup = BS(str(tableRows[-2]), 'lxml') tableHeaderData = tableHeaderSoup.find_all('th') tableHeaders = [] for i in tableHeaderData: tableHeaders.append(i.text) tableData = tableDataSoup.find_all('td') tableDatas = [] for i in tableData: tableDatas.append(i.text) return [tableHeaders, tableDatas] myDict = mi.read('teamDict.json') finalList = {} for i in myDict: if (i != "Liberty"): temp = getUrls(myDict[i]) tempDict = {} for j in temp: soup = getSoup(temp[j]) element = getTableHeaders(soup) tempDict[j] = element finalList[i] = tempDict else: pass mi.write('finalTeamTableDict.json', finalList)
#initialize edges for Bellman-Ford Algorithm edges = initializeEdges(testGraph) outputFord = bellmanFord(testGraph, edges, source) # outputDistances("Bellman-Ford Algorithm", outputFord['Distances'], f, source) outputPathways("Bellman-Ford Algorithm", outputFord['Previous'], f, source) yORn = getUserInput( "Would you like the 'distance' and 'previous' dictionaries written to seperate .json files? (yes or no): " ) if (yORn == 'yes'): if (mpuYesOrNo == 'yes'): mi.write('outPutBellmanFord.json', outputFord) mi.write('outputForDijkstra.json', output) print( 'Dijkstra Results: outputForDijkstra.json\nBellman-Ford Results: outPutBellmanFord.json' ) elif (mpuYesOrNo == 'no'): with open('outPutBellmanFord.json', 'w') as wj: json.dump(outputFord, wj, indent=10) with open('outputForDijkstra.json', 'w') as wj: json.dump(output, wj, indent=10) print( 'Dijkstra Results: outputForDijkstra.json\nBellman-Ford Results: outPutBellmanFord.json' ) yORn = getUserInput(
PlayerList = [] PlayerList.append([ 'Number', 'Last Name', 'First Name', 'Position', 'Year', 'heigh', 'weight', 'hometown', 'Highschool', 'Player_ID', 'Team_ID' ]) #pdb.set_trace() pdb.set_trace() for i in myDict: try: for j in myDict[i]: #fixing 1 temp = j[1] names = temp.split(",") del j[1] j.insert(1, names[1]) j.insert(1, names[0]) #print(j[5]) temp = j[5] heigh = temp.split("-") #print(heigh) del j[5] j.insert(5, "'".join(heigh)) PlayerList.append(j) except: pass #will skip liberty (not a team in 2017, but existed on the wikepedia scrape) f = open('output.txt', 'w') mi.write('PlayerTable.csv', PlayerList)
import mpu.io as mi myDict = mi.read('gameTabelData.json') listOfGames = [] listOfGames.append(['game_ID', 'Team one', 'Team two', 'Q1', 'Q2', 'Q3', 'Q4','OT', 'Matchup', 'Game Type']) for i in myDict: temp = [] temp.append(i) for j in myDict[i][1]: temp.append(j) for j in myDict[i][2]: temp.append(j) if (len(temp) != 8): temp.append(" ") if (i == '400941816'): temp.append(" ") temp.append(myDict[i][3]) temp.append(myDict[i][4]) listOfGames.append(temp) f = open('output.txt', 'w') mi.write('gameTable.csv', listOfGames)
print(teams, file=f) pdb.set_trace() #Loop Through the list and get the data for i in GameTable: temp = GameTable[i][1] #see if the team exists in the database. #Temporary dictionary to hold winner and loser tempDict = {} #teams are organized in a way such that the winner is the first and the loser is the second #so the counter will be used to determine position when we get a hit on the if (is in) statement counter = 0 # #loops through the list of teams for j in temp: if (j in teams): if (counter == 0): tempDict['Winner'] = j elif (counter == 1): tempDict['Loser'] = j counter += 1 #saves the winner and the loser dictionary to the overall dictionary under the game ID #used to find statistics. TeamsAndID[i] = tempDict #save the created dictionary for revision mi.write('TeamsAndID.json', TeamsAndID)
import mpu.io as mi import pdb teamDict = mi.read('teamDict.json') teamStatsDict = mi.read('finalTeamTableDict.json') pdb.set_trace() passingList = teamStatsDict["Air Force"]["rushing"][0][4:] passingList = [x.replace(".", "") for x in passingList[:]] passingList = [x.replace("/", "_") for x in passingList[:]] passingList = [x.replace("Int", "Interception") for x in passingList[:]] newPassingList = [list(passingList)] newPassingList[0].append("TEAM_ID") pdb.set_trace() for i in teamStatsDict: temp = teamStatsDict[i]["rushing"][1][4:] temp.append(teamDict[i]) newPassingList.append(temp) mi.write('teamRushingStats.csv', newPassingList)
for i in listOfTables: for j in i: if (j[0] == '['): #pdb.set_trace() i.remove(j) teamName = 0 mascot = 1 city = 2 state = 3 conference = 4 ID = -1 listToCSV = [] for i in listOfTables: temp = [] temp.append(i[ID]) temp.append(i[teamName]) temp.append(i[mascot]) temp.append(i[city]) temp.append(i[state]) temp.append(i[conference]) listToCSV.append(temp) mi.write('TeamTable.csv', listToCSV)
import getSources import mpu.io as mi import getTeamNanes import getBowlGame import cancelChecker as CC import getScore dictionary = mi.read('gameIDS.json') IDS = [] gameDataDict = {} for i in dictionary: for j in dictionary[i]: source = getSources.GetSource(j) #check if cancelled if ((CC.canceled(source) == 1)): bowlName = "" check = 0 #check if bowl game if (i == 'Bowls'): bowlName = getBowlGame.getBowlGame(source) check = 1 TeamNames = getTeamNanes.parseTeamNames(source) important = getScore.getScores(source, TeamNames, bowlName, j, check) print(important) gameDataDict[str(j)] = important else: continue mi.write('gameTabelData.json', gameDataDict)
teamDict = mi.read('teamDict.json') newDict = {} for i in teamDict: listOfIDs = [] try: for j in rosterDict[i]['2017']: listOfIDs.append(j) newDict[teamDict[i]] = listOfIDs except: pass #mi.write('teamAndPlayerIDS.json', newDict) #pdb.set_trace() playerStatisticsDict = {} for i in newDict: #pdb.set_trace() urls = generateURLs(i, newDict[i]) start = time.time() elements = scraper.main(urls, newDict[i]) end = time.time() print(end - start) playerStatisticsDict[str(i)] = elements mi.write('finalPlayerStatsDict.json', playerStatisticsDict)
from mpu import io as mi import pdb mainDict = mi.read('player_played_in_table.json') finalList = [] header = ['Game_No', 'Player_id'] finalList.append(header) pdb.set_trace() for i in mainDict: temp = [] for j in mainDict[i]: for k in j: temp.append([i,k]) for j in temp: finalList.append(j) mi.write('player_played_inTable.csv', finalList)