def main(): if not SKIP_DOWNLOAD: if os.path.exists('.tmp'): shutil.rmtree('.tmp') os.makedirs('.tmp', exist_ok=True) print("Downloading files...") download_file(RP_URL, '.tmp/rp.zip') download_file(BP_URL, '.tmp/bp.zip') if os.path.exists('.tmp/rp'): shutil.rmtree('.tmp/rp') print('Extracting resource pack') with ZipFile('.tmp/rp.zip') as zipf: zipf.extractall('.tmp/rp') if os.path.exists('.tmp/bp'): shutil.rmtree('.tmp/bp') print('Extracting behavior pack') with ZipFile('.tmp/bp.zip') as zipf: zipf.extractall('.tmp/bp') stable = DOWNLOAD_MODE == 'stable' harvest('.tmp/bp', stable) strip_sounds('.tmp/rp/sounds/sound_definitions.json')
no_console_display = False # start progress bar for html output files if no_console_display: config_number = len(filenames[0]) config_checked = 0 bar = progressbar.ProgressBar(maxval=config_number, widgets=[ progressbar.Bar(left='[', marker='=', right=']'), # Прогресс progressbar.SimpleProgress(), ]).start() # Creating dictionary for drawing graph dict_for_drawing_plot = {} # creds harvester if (args.args.dump_creds): harvester.harvest(filenames[0]) # processing configs one by one for filename in filenames[0]: # Define config file name config_name = filename.partition(config_directory)[2] # Create output html file full path if needed if html_directory: html_file = html_directory + config_name + '.html' # parsing configs parsing.parseconfigs(filename, check_disabled) # getting parse output
def rank( year, alpha=0.85, iters=3500, print_rankings=False, plot_rankings=False, serialize_results=True, first_year=True, ): """ Ranks all Division I NCAA Basketball teams in a given year using PageRank. PARAMETERS year --> The year that the NCAA championship game takes place. The 2019-2020 season would correpond to year=2020. alpha --> A value between 0 and 1 that is a measure of randomness in PageRank meant to model the possibility that a user randomly navigates to a page without using a link. alpha=1 would be completely deterministic, while alpha=0 would be completely random. Google is rumored to use alpha=.85. iters --> The number of iterations of PageRank matrix multiplication to perform. The default of iters=3500, about 10x the number of Division I teams, is generally sufficient for ranking. print_rankings --> Should the function print out its rankings to stdout? Defaults to False. plot_rankings --> Should the function plot the rankings in sorted order? Defaults to False. serialize_results --> Should the function save its rankings to a .p file called "./predictions/[YEAR]/rankings.p"? Defaults to True. """ # Try to deserialize .p file summary. Try to create it if it doesn't exist try: total_summary = pickle.load( open("./summaries/" + str(year) + "/total_summary.p", "rb") ) except FileNotFoundError: print("--- WARNING: No summary found for", year, "Trying to create summary...") try: harvester.harvest(year) except: print("--- ERROR: Could not make summary for", year) return print("--- SUCCESS: Summary created for", year) print("--- Trying to rank again with newly created summary") rank(year, alpha, iters, print_rankings, plot_rankings, serialize_results) return # Get an ordered list of all the teams teams = list( set( [ "-".join(game[GameValues.HOME_TEAM.value].split(" ")) for game in total_summary ] ) ) num_teams = len(teams) # Create PageRank matrix and initial vector if first_year: vec = np.ones((num_teams, 1)) else: vec = rank(year - 1, alpha - 0.1, serialize_results=False, first_year=True) num_teams = max(num_teams, len(vec)) mat = np.zeros((num_teams, num_teams)) for game in total_summary: # We only want to count games where both teams are D1 (in teams list) # We choose to only look at games where the first team won so we don't double-count games if ( game[GameValues.HOME_TEAM.value] in teams and game[GameValues.AWAY_TEAM.value] in teams and game[GameValues.WIN_LOSS.value] ): # Game winner # Since we know the first/home team won, we can already assign the weight for that home_pr_score, away_pr_score = GameWeights.WEIGHTS.value[0], 0.0 # Effective field goal percentage if game[GameValues.HOME_eFGp.value] > game[GameValues.AWAY_eFGp.value]: home_pr_score += GameWeights.WEIGHTS.value[1] elif game[GameValues.AWAY_eFGp.value] > game[GameValues.HOME_eFGp.value]: away_pr_score += GameWeights.WEIGHTS.value[1] # Turnover percentage if game[GameValues.HOME_TOVp.value] < game[GameValues.AWAY_TOVp.value]: home_pr_score += GameWeights.WEIGHTS.value[2] elif game[GameValues.AWAY_TOVp.value] < game[GameValues.HOME_TOVp.value]: away_pr_score += GameWeights.WEIGHTS.value[2] # Offensive rebound percentage if game[GameValues.HOME_ORBp.value] > game[GameValues.AWAY_ORBp.value]: home_pr_score += GameWeights.WEIGHTS.value[3] elif game[GameValues.AWAY_ORBp.value] > game[GameValues.HOME_ORBp.value]: away_pr_score += GameWeights.WEIGHTS.value[3] # Free throw rate if game[GameValues.HOME_FTR.value] > game[GameValues.AWAY_FTR.value]: home_pr_score += GameWeights.WEIGHTS.value[4] elif game[GameValues.AWAY_FTR.value] > game[GameValues.HOME_FTR.value]: away_pr_score += GameWeights.WEIGHTS.value[4] # Add weighted score for this game to matrix for both teams home_idx = teams.index(game[GameValues.HOME_TEAM.value]) away_idx = teams.index(game[GameValues.AWAY_TEAM.value]) mat[home_idx, away_idx] += home_pr_score mat[away_idx, home_idx] += away_pr_score # Alter the matrix to take into account our alpha factor mat = (alpha * mat) + (1 - alpha) * np.ones((num_teams, num_teams)) / num_teams # Perform many iterations of matrix multiplication for i in range(iters): vec = mat @ vec vec *= num_teams / sum(vec) # Keep weights summed to set value (numerator) # Sort the (ranking, team) pair into a list of tuples sorted_pairs = sorted([(prob[0], team) for team, prob in zip(teams, vec)]) # Print ranking pairs if specificed if print_rankings: for i in range(len(sorted_pairs)): print(num_teams - i, sorted_pairs[i]) # Serialize results if specificed # TODO: Serialize results as dict? if serialize_results: # Make the year folder outfile1 = f"./predictions/{year}_rankings.p" outfile2 = f"./predictions/{year}_vector.p" os.makedirs(os.path.dirname(outfile1), exist_ok=True) serial = dict() for team in teams: serial.setdefault(team, 0) for item in sorted_pairs: serial[item[1]] = item[0] pickle.dump(serial, open(outfile1, "wb")) pickle.dump(vec, open(outfile2, "wb")) # Plot graph of rankings if specified if plot_rankings: s = sorted(vec) bins = np.arange(0.0, 3.5, 0.125) hist, bins = np.histogram(s, bins=bins) plt.hist(bins[:-1], bins, weights=hist) plt.show() return vec