def test_get_rankings_all():
    temp_folder = os.path.join(os.getcwd(), 'temp/file.csv')
    make_directory(temp_folder)

    from_year, to_year = 1993, 2019
    get_rankings_all(from_year, to_year, RAW_CLEANED_DATA_FILE_PATH, temp_folder)

    for year in range(from_year, to_year + 1):
        csv_file = '{}-{}.csv'.format(year, year + 1)
        created_file = os.path.join(temp_folder, csv_file)
        cmp_file = os.path.join(STANDINGS_PATH, csv_file)
        assert compare_csv(cmp_file, created_file)
    remove_directory(temp_folder)
def test_get_rankings_year_after_sofifa():
    year = 2008
    temp_folder = os.path.join(os.getcwd(), 'temp')
    csv_file = '{}-{}.csv'.format(year, year + 1)
    from_file = os.path.join(RAW_CLEANED_DATA_FILE_PATH, csv_file)
    to_file = os.path.join(temp_folder, csv_file)
    make_directory(temp_folder)

    get_rankings(from_file, to_file, '{}-12-31'.format(str(year+1)), include_prediction=False)

    cmp_file = os.path.join(STANDINGS_PATH, csv_file)
    assert compare_csv(cmp_file, to_file)
    remove_directory(temp_folder)
def test_compare_csv():
    temp_folder = os.path.join(os.getcwd(), 'temp')
    temp_file = os.path.join(temp_folder, 'temp.csv')
    remove_directory(temp_folder)

    assert not os.path.isfile(temp_file)
    assert not os.path.isdir(temp_folder)

    copy_csv(FINAL_FILE, temp_file)

    assert compare_csv(FINAL_FILE, temp_file)
    assert compare_csv(temp_file, temp_file)
    assert compare_csv(FINAL_FILE, FINAL_FILE)

    remove_directory(temp_folder)
Example #4
0
def delete_images_directory(reset_downloaded_images_list=True):
    if remove_directory(os.path.join('content', 'images'), 'State'):
        print('> [State Robot] Images directory successfully removed.')

        if reset_downloaded_images_list:
            content = load()
            content.reset_downloaded_images_list()
            save(content)
Example #5
0
def test_get_current_fixtures():
    temp_folder = os.path.join(os.getcwd(), 'temp')
    temp_file = os.path.join(temp_folder, 'temp.csv')
    make_directory(temp_file)
    get_current_fixtures(temp_file)

    assert os.path.isfile(temp_file)
    df = pd.read_csv(temp_file)
    df_columns_list = list(df)

    assert 'Date' in df_columns_list
    assert 'HomeTeam' in df_columns_list
    assert 'AwayTeam' in df_columns_list
    assert 'FTHG' in df_columns_list
    assert 'FTAG' in df_columns_list
    assert 'FTR' in df_columns_list

    remove_directory(temp_folder)
Example #6
0
    def start_experiment(self,
                         num_epochs,
                         num_epoch_iterations,
                         test_phase_length,
                         resume_from_epoch=None,
                         show_final_plots=False):
        state_save_folder = 'experiment_results/{}'.format(
            self.experiment_name)

        for i in range(num_epochs):
            epoch_index = i + 1
            if resume_from_epoch is not None and epoch_index <= resume_from_epoch:
                continue
            self.dynamics_simulator.run_simulation(num_epoch_iterations *
                                                   epoch_index)
            new_state_save_location = state_save_folder + '/epoch_{}'.format(
                epoch_index)
            if epoch_index > 1:
                prev_state_save_location = state_save_folder + '/epoch_{}'.format(
                    epoch_index - 1)
                self.save_state(
                    new_state_save_location=new_state_save_location,
                    prev_state_save_location=prev_state_save_location)
                remove_directory(location=prev_state_save_location)
            else:
                self.save_state(
                    new_state_save_location=new_state_save_location)

        self.dynamics_simulator.set_testing_phase(True)
        self.dynamics_simulator.run_simulation(num_epochs *
                                               num_epoch_iterations +
                                               test_phase_length)

        self.save_state(new_state_save_location=state_save_folder + '/test',
                        prev_state_save_location=state_save_folder +
                        '/epoch_{}'.format(num_epochs),
                        show_generated_plots=show_final_plots)
Example #7
0
def delete_content_directory():
    if remove_directory('content', 'State'):
        print('> [State Robot] Content directory successfully removed.')
def magic(should_train=True,
          should_scrape=False,
          data_year_available_from=1993,
          data_year_collect_from=2006):
    # Function(s) that don't have to be executed every time

    # 1. OVA data from sofifa_scraper (Warning: This takes a long time to run)
    #   SOFIFA updates their stat two or three times every month, but they don't change data much
    # Uncomment below to scrape team overall stat data
    if should_scrape:
        scrape_team_ova_all(OVA_FILE_PATH, data_year_collect_from,
                            CURRENT_YEAR)

    # Preprocessing

    # 1. Latest premier league results
    # This data can also be retrieved from http://www.football-data.co.uk/englandm.php
    # Uncomment below to get the latest match results
    get_current_fixtures(RAW_DATA_FILE_PATH_CURRENT)

    # 2. Standings (from 1993 to curent year)
    # Uncomment below to run the function
    get_rankings_all(data_year_available_from, CURRENT_YEAR,
                     RAW_CLEANED_DATA_FILE_PATH, STANDINGS_PATH)

    # Run the functions below to start generating necessary data

    # 1. From raw data, remove all data but the selected columns.
    # Produces: cleaned data csv located in CLEANED_DATA_FILE_PATH
    clean_all(RAW_DATA_FILE_PATH, RAW_CLEANED_DATA_FILE_PATH,
              data_year_available_from, CURRENT_YEAR)

    # 2. From 1, add Overall Rating columns
    # Produces: cleaned csv modified, located in CLEANED_DATA_FILE_PATH. Now all cleaned csv from 2006-2018 have OVA column.
    merge_ova_to_cleaned_all(OVA_FILE_PATH, RAW_CLEANED_DATA_FILE_PATH,
                             data_year_collect_from, CURRENT_YEAR)

    # 3. From 2, copy cleaned raw data to cleaned data for prediction purpose
    # Produces: copy csv from RAW_CLEANED_DATA_FILE_PATH to CLEANED_DATA_FILE_PATH
    copy_csv(RAW_CLEANED_DATA_FILE_PATH, CLEANED_DATA_FILE_PATH)

    # 4. From 3, add current status columns (current point, current goal for,against,difference, match played, losing/winning streaks, last 5 games)
    # Produces: cleaned csv modified, located in CLEANED_DATA_FILE_PATH. Now all cleaned csv from 1993-2018 have additional columns
    add_current_details_all(CLEANED_DATA_FILE_PATH, CLEANED_DATA_FILE_PATH,
                            STANDINGS_PATH, data_year_available_from,
                            CURRENT_YEAR, data_year_available_from)

    # 5. From 4, merge all csv files from startYear to endYear together.
    # FOR NOW, I only collect data from 2006 because sofifa only provides ova data from 2006, and model tends to perform better with this approach
    # Produces: new csv file on FINAL_FILE
    combine_matches(CLEANED_DATA_FILE_PATH, FINAL_FILE, data_year_collect_from,
                    CURRENT_YEAR)

    # 6. From 5, get all head-to-head results (match results against the other team over time)
    # Produces: editted final.csv file under DATA_PATH
    get_match_results_against(FINAL_FILE, CLEANED_DATA_FILE_PATH, DATA_PATH,
                              data_year_available_from, CURRENT_YEAR)

    # 7. Once all data is aggregated, we can now build a classifer that make preidctions.
    # If 'recalculate' is set True, it runs multiple classifiers on this data,
    # and do some grid search on it if necessary, and finally generates 'model confidence.csv' that records confidence score of each classifier.
    # If 'recalculate' is set False, and if clf_file exists, then it simply loads the clf from clf_file.
    # Produces: returns the best clf.
    best_clf, _, best_clf_average = get_clf(FINAL_FILE,
                                            CONFIDENCE_FILE,
                                            CLF_FILE,
                                            recalculate=should_train)

    # 8. Now we make prediction. This process is done by first predicting the upcoming round, then aggregate the result, then predict the next,
    # and repeat the process until there are no more games to predict. "predict_next_round" also produces prediction probabilities
    # for each matches on stat_path.
    #  - 1. predict_next_round predicts next round and save the result in RAW_CLEANED_DATA_FILE_PATH_CURRENT.
    #  - 2. add_current_details, as its name suggests, it adds current details.
    #  - 3. combine_matches combine all matches from 2006 to 2018
    #  - 4. get_match_results_against adds head-to-head results between two teams for each match
    is_first = True

    # First save current ranking before predicting results
    remove_directory(STATISTICS_PATH)
    now = datetime.datetime.now().date().strftime('%Y-%m-%d')
    pred_ranking_round_file = os.path.join(
        PRED_RANKING_ROUND_PATH, 'prediction_ranking_{}.csv'.format(now))
    get_rankings(RAW_CLEANED_DATA_FILE_PATH_CURRENT,
                 pred_ranking_round_file,
                 include_prediction=True,
                 predicted_date_so_far=now,
                 ranking_summary_file=PRED_RANKING_ROUND_SUMMARY_FILE)

    while True:
        is_next_round, date = predict_next_round(
            best_clf,
            FINAL_FILE,
            RAW_CLEANED_DATA_FILE_PATH_CURRENT,
            statistics=True,
            stat_path=PREDICTION_FILE,
            first=is_first)
        if not is_next_round:
            break
        add_current_details(RAW_CLEANED_DATA_FILE_PATH_CURRENT,
                            CLEANED_DATA_FILE_PATH_CURRENT, STANDINGS_PATH,
                            data_year_available_from)
        combine_matches(CLEANED_DATA_FILE_PATH, FINAL_FILE,
                        data_year_collect_from, CURRENT_YEAR)
        get_match_results_against(FINAL_FILE, CLEANED_DATA_FILE_PATH,
                                  DATA_PATH, data_year_available_from,
                                  CURRENT_YEAR)
        pred_ranking_round_file = os.path.join(
            PRED_RANKING_ROUND_PATH, 'prediction_ranking_{}.csv'.format(date))
        get_rankings(PREDICTION_FILE,
                     pred_ranking_round_file,
                     include_prediction=True,
                     predicted_date_so_far=date,
                     ranking_summary_file=PRED_RANKING_ROUND_SUMMARY_FILE)
        is_first = False

    # 9. Now prediction is done. Produce a season standing with using the prediction result.
    winning_team = get_rankings(PREDICTION_FILE,
                                PRED_RANKING_FILE,
                                include_prediction=True)

    # 10. Put previous results, prediction results, standing predictions to the database
    save_new_data_to_database(DATABASE_PATH, FINAL_FILE, PREDICTION_FILE,
                              PRED_RANKING_ROUND_SUMMARY_FILE)

    # 11. Summary to database
    if should_train:
        save_summary_to_database(DATABASE_PATH, best_clf_average, winning_team)