Python harvest Examples, harvester.harvest Python Examples

Example #1

0

Show file

def main():
    if not SKIP_DOWNLOAD:
        if os.path.exists('.tmp'):
            shutil.rmtree('.tmp')
        os.makedirs('.tmp', exist_ok=True)

        print("Downloading files...")
        download_file(RP_URL, '.tmp/rp.zip')
        download_file(BP_URL, '.tmp/bp.zip')

    if os.path.exists('.tmp/rp'):
        shutil.rmtree('.tmp/rp')
    print('Extracting resource pack')
    with ZipFile('.tmp/rp.zip') as zipf:
        zipf.extractall('.tmp/rp')

    if os.path.exists('.tmp/bp'):
        shutil.rmtree('.tmp/bp')
    print('Extracting behavior pack')
    with ZipFile('.tmp/bp.zip') as zipf:
        zipf.extractall('.tmp/bp')

    stable = DOWNLOAD_MODE == 'stable'

    harvest('.tmp/bp', stable)
    strip_sounds('.tmp/rp/sounds/sound_definitions.json')

Example #2

0

Show file

File: ccat.py Project: y0d4a/ccat

    no_console_display = False

# start progress bar for html output files
if no_console_display:
    config_number = len(filenames[0])
    config_checked = 0
    bar = progressbar.ProgressBar(maxval=config_number, widgets=[
        progressbar.Bar(left='[', marker='=', right=']'),  # Прогресс
        progressbar.SimpleProgress(),
    ]).start()

# Creating dictionary for drawing graph
dict_for_drawing_plot = {}
# creds harvester
if (args.args.dump_creds):
    harvester.harvest(filenames[0])

# processing configs one by one
for filename in filenames[0]:

    # Define config file name
    config_name = filename.partition(config_directory)[2]

    # Create output html file full path if needed
    if html_directory:
        html_file = html_directory + config_name + '.html'

    # parsing configs
    parsing.parseconfigs(filename, check_disabled)

    # getting parse output

Example #3

0

Show file

def rank(
    year,
    alpha=0.85,
    iters=3500,
    print_rankings=False,
    plot_rankings=False,
    serialize_results=True,
    first_year=True,
):
    """
    Ranks all Division I NCAA Basketball teams in a given year using PageRank.
    PARAMETERS
    year -->    The year that the NCAA championship game takes place.
                The 2019-2020 season would correpond to year=2020.
    alpha -->   A value between 0 and 1 that is a measure of randomness in
                PageRank meant to model the possibility that a user randomly
                navigates to a page without using a link. alpha=1 would be
                completely deterministic, while alpha=0 would be completely
                random. Google is rumored to use alpha=.85.
    iters -->   The number of iterations of PageRank matrix multiplication to
                perform. The default of iters=3500, about 10x the number of
                Division I teams, is generally sufficient for ranking.
    print_rankings -->  Should the function print out its rankings to stdout?
                        Defaults to False.
    plot_rankings -->   Should the function plot the rankings in sorted order?
                        Defaults to False.
    serialize_results -->   Should the function save its rankings to a .p file
                            called "./predictions/[YEAR]/rankings.p"?
                            Defaults to True.
    """

    # Try to deserialize .p file summary. Try to create it if it doesn't exist
    try:
        total_summary = pickle.load(
            open("./summaries/" + str(year) + "/total_summary.p", "rb")
        )
    except FileNotFoundError:
        print("--- WARNING: No summary found for", year, "Trying to create summary...")
        try:
            harvester.harvest(year)
        except:
            print("--- ERROR: Could not make summary for", year)
            return
        print("--- SUCCESS: Summary created for", year)
        print("--- Trying to rank again with newly created summary")
        rank(year, alpha, iters, print_rankings, plot_rankings, serialize_results)
        return

    # Get an ordered list of all the teams
    teams = list(
        set(
            [
                "-".join(game[GameValues.HOME_TEAM.value].split(" "))
                for game in total_summary
            ]
        )
    )
    num_teams = len(teams)

    # Create PageRank matrix and initial vector
    if first_year:
        vec = np.ones((num_teams, 1))
    else:
        vec = rank(year - 1, alpha - 0.1, serialize_results=False, first_year=True)

    num_teams = max(num_teams, len(vec))
    mat = np.zeros((num_teams, num_teams))
    for game in total_summary:
        # We only want to count games where both teams are D1 (in teams list)
        # We choose to only look at games where the first team won so we don't double-count games
        if (
            game[GameValues.HOME_TEAM.value] in teams
            and game[GameValues.AWAY_TEAM.value] in teams
            and game[GameValues.WIN_LOSS.value]
        ):
            # Game winner
            # Since we know the first/home team won, we can already assign the weight for that
            home_pr_score, away_pr_score = GameWeights.WEIGHTS.value[0], 0.0

            # Effective field goal percentage
            if game[GameValues.HOME_eFGp.value] > game[GameValues.AWAY_eFGp.value]:
                home_pr_score += GameWeights.WEIGHTS.value[1]
            elif game[GameValues.AWAY_eFGp.value] > game[GameValues.HOME_eFGp.value]:
                away_pr_score += GameWeights.WEIGHTS.value[1]

            # Turnover percentage
            if game[GameValues.HOME_TOVp.value] < game[GameValues.AWAY_TOVp.value]:
                home_pr_score += GameWeights.WEIGHTS.value[2]
            elif game[GameValues.AWAY_TOVp.value] < game[GameValues.HOME_TOVp.value]:
                away_pr_score += GameWeights.WEIGHTS.value[2]

            # Offensive rebound percentage
            if game[GameValues.HOME_ORBp.value] > game[GameValues.AWAY_ORBp.value]:
                home_pr_score += GameWeights.WEIGHTS.value[3]
            elif game[GameValues.AWAY_ORBp.value] > game[GameValues.HOME_ORBp.value]:
                away_pr_score += GameWeights.WEIGHTS.value[3]

            # Free throw rate
            if game[GameValues.HOME_FTR.value] > game[GameValues.AWAY_FTR.value]:
                home_pr_score += GameWeights.WEIGHTS.value[4]
            elif game[GameValues.AWAY_FTR.value] > game[GameValues.HOME_FTR.value]:
                away_pr_score += GameWeights.WEIGHTS.value[4]

            # Add weighted score for this game to matrix for both teams
            home_idx = teams.index(game[GameValues.HOME_TEAM.value])
            away_idx = teams.index(game[GameValues.AWAY_TEAM.value])

            mat[home_idx, away_idx] += home_pr_score
            mat[away_idx, home_idx] += away_pr_score

    # Alter the matrix to take into account our alpha factor
    mat = (alpha * mat) + (1 - alpha) * np.ones((num_teams, num_teams)) / num_teams

    # Perform many iterations of matrix multiplication
    for i in range(iters):
        vec = mat @ vec
        vec *= num_teams / sum(vec)  # Keep weights summed to set value (numerator)

    # Sort the (ranking, team) pair into a list of tuples
    sorted_pairs = sorted([(prob[0], team) for team, prob in zip(teams, vec)])

    # Print ranking pairs if specificed
    if print_rankings:
        for i in range(len(sorted_pairs)):
            print(num_teams - i, sorted_pairs[i])

    # Serialize results if specificed
    # TODO: Serialize results as dict?
    if serialize_results:
        # Make the year folder
        outfile1 = f"./predictions/{year}_rankings.p"
        outfile2 = f"./predictions/{year}_vector.p"
        os.makedirs(os.path.dirname(outfile1), exist_ok=True)

        serial = dict()
        for team in teams:
            serial.setdefault(team, 0)
        for item in sorted_pairs:
            serial[item[1]] = item[0]

        pickle.dump(serial, open(outfile1, "wb"))
        pickle.dump(vec, open(outfile2, "wb"))

    # Plot graph of rankings if specified
    if plot_rankings:
        s = sorted(vec)
        bins = np.arange(0.0, 3.5, 0.125)
        hist, bins = np.histogram(s, bins=bins)
        plt.hist(bins[:-1], bins, weights=hist)
        plt.show()

    return vec