def run():
    local_test_flag = False
    icehockey_mdn_Qs_config_path = "../environment_settings/ice_hockey_predict_Qs_mdn.yaml"
    icehockey_mdn_Qs_config = MDNQsCongfig.load(icehockey_mdn_Qs_config_path)
    saved_network_dir, log_dir = get_model_and_log_name(config=icehockey_mdn_Qs_config, model_catagoery='mdn_Qs')

    if local_test_flag:
        data_store_dir = "/Users/liu/Desktop/Ice-hokcey-data-sample/feature-sample"
        dir_games_all = os.listdir(data_store_dir)
        training_dir_games_all = os.listdir(data_store_dir)
        testing_dir_games_all = os.listdir(data_store_dir)
        save_flag = False
    else:
        data_store_dir = icehockey_mdn_Qs_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/"
        dir_games_all = os.listdir(data_store_dir)
        training_dir_games_all = dir_games_all[0: len(dir_games_all) / 10 * 9]
        # testing_dir_games_all = dir_games_all[len(dir_games_all)/10*9:]
        testing_dir_games_all = dir_games_all[-10:]  # TODO: testing
        save_flag = True
    number_of_total_game = len(dir_games_all)
    icehockey_mdn_Qs_config.Learn.number_of_total_game = number_of_total_game

    sess = tf.Session()
    model = MixtureDensityNN(config=icehockey_mdn_Qs_config)
    model()
    sess.run(tf.global_variables_initializer())

    if not local_test_flag:
        if not os.path.exists(saved_network_dir):
            os.mkdir(saved_network_dir)
        # save the training and testing dir list
        if os.path.exists(saved_network_dir + '/training_file_dirs_all.csv'):
            os.rename(saved_network_dir + '/training_file_dirs_all.csv',
                      saved_network_dir + '/bak_training_file_dirs_all_{0}.csv'
                      .format(datetime.date.today().strftime("%Y%B%d")))
        if os.path.exists(saved_network_dir + '/testing_file_dirs_all.csv'):
            os.rename(saved_network_dir + '/testing_file_dirs_all.csv',
                      saved_network_dir + '/bak_testing_file_dirs_all_{0}.csv'
                      .format(datetime.date.today().strftime("%Y%B%d")))
        # save the training and testing dir list
        with open(saved_network_dir + '/training_file_dirs_all.csv', 'wb') as f:
            for dir in dir_games_all[0: len(dir_games_all) / 10 * 8]:
                f.write(dir + '\n')
        with open(saved_network_dir + '/testing_file_dirs_all.csv', 'wb') as f:
            for dir in dir_games_all[len(dir_games_all) / 10 * 9:]:
                f.write(dir + '\n')
    run_network(sess=sess, model=model, config=icehockey_mdn_Qs_config, log_dir=log_dir,
                save_network_dir=saved_network_dir, data_store=data_store_dir,
                training_dir_games_all=training_dir_games_all, testing_dir_games_all=testing_dir_games_all,
                player_id_cluster_dir=None, save_flag=save_flag)
    sess.close()
def combine_de_embeddings():
    combined_embeddings = None
    predicted_target_all = ['state', 'action', 'reward']
    for predicted_target in predicted_target_all:
        de_config_path = "../environment_settings/ice_hockey_{0}_de.yaml".format(predicted_target)
        de_config = DEEmbedCongfig.load(de_config_path)
        save_network_dir, log_dir = get_model_and_log_name(config=de_config, model_catagoery='de_embed',
                                                           train_flag=False)
        save_embed_dir = save_network_dir.replace('de_embed_saved_networks', 'store_embedding'). \
            replace('de_model_saved_NN', 'de_model_save_embedding')

        with open(save_embed_dir + '/embedding_matrix_game{0}.csv'.format(str(901)), 'r') as f:
            csv_reader = csv.reader(f)
            read_embedding = []
            for row in csv_reader:
                # print(row)
                read_embedding.append(row)
        if combined_embeddings is None:
            combined_embeddings = np.asarray(read_embedding)
        else:
            combined_embeddings = np.concatenate([combined_embeddings, read_embedding], axis=1)

    return combined_embeddings
            [model.mu_out, model.var_out],
            feed_dict={
                model.rnn_input_ph: input_data,
                model.trace_lengths_ph: state_trace_length
            })
        mu_all.append(mu)
        var_all.append(var)
    return mu_all, var_all


if __name__ == '__main__':
    test_flag = True
    ci_value = 1.96
    icehockey_mdn_Qs_config_path = "../environment_settings/ice_hockey_predict_Qs_mdn.yaml"
    icehockey_mdn_Qs_config = MDNQsCongfig.load(icehockey_mdn_Qs_config_path)
    saved_network_dir, log_dir = get_model_and_log_name(
        config=icehockey_mdn_Qs_config, model_catagoery='mdn_Qs')

    if test_flag:
        data_store_dir = "/Users/liu/Desktop/Ice-hokcey-data-sample/feature-sample"
        dir_games_all = os.listdir(data_store_dir)
        training_dir_games_all = os.listdir(data_store_dir)
        testing_dir_games_all = os.listdir(data_store_dir)
        save_flag = False
    else:
        data_store_dir = icehockey_mdn_Qs_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/"
        dir_games_all = os.listdir(data_store_dir)
        training_dir_games_all = dir_games_all[0:len(dir_games_all) / 10 * 9]
        # testing_dir_games_all = dir_games_all[len(dir_games_all)/10*9:]
        testing_dir_games_all = dir_games_all[-10:]  # TODO: testing
        save_flag = True
    number_of_total_game = len(dir_games_all)
def run():
    running_number = 2
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    local_test_flag = False
    type = '_pid'
    icehockey_mdn_Qs_config_path = "../environment_settings/ice_hockey_predict_Qs_lstm{0}.yaml".format(str(type))
    icehockey_mdn_Qs_config = LSTMQsCongfig.load(icehockey_mdn_Qs_config_path)
    saved_network_dir, log_dir = get_model_and_log_name(config=icehockey_mdn_Qs_config,
                                                        model_catagoery='lstm_Qs',
                                                        running_number=running_number)

    if local_test_flag:
        data_store_dir = "/Users/liu/Desktop/Ice-hokcey-data-sample/feature-sample"
        dir_games_all = os.listdir(data_store_dir)
        training_dir_games_all = os.listdir(data_store_dir)
        validate_dir_games_all = os.listdir(data_store_dir)
        testing_dir_games_all = os.listdir(data_store_dir)
        tmp_testing_dir_games_all = os.listdir(data_store_dir)
        save_flag = False
    else:
        data_store_dir = icehockey_mdn_Qs_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/"
        dir_games_all = os.listdir(data_store_dir)
        save_flag = True
        if running_number == 0:
            training_dir_games_all = dir_games_all[
                                     0: len(dir_games_all) / 5 * 4 - running_number * len(dir_games_all) / 5]
        else:
            training_dir_games_all = dir_games_all[
                                     0: len(dir_games_all) / 5 * 4 - running_number * len(dir_games_all) / 5] \
                                     + dir_games_all[-running_number * len(dir_games_all) / 5:]
        test_validate_dir_games_all = [item for item in dir_games_all if item not in training_dir_games_all]
        testing_dir_games_all = test_validate_dir_games_all[:len(test_validate_dir_games_all)/2]
        validate_dir_games_all = test_validate_dir_games_all[len(test_validate_dir_games_all) / 2:]
        tmp_testing_dir_games_all = testing_dir_games_all[-10:]  # TODO: it is a small running testing, not the real one
    # data_store_dir = '/cs/oschulte/xiangyus/2019-icehockey-data-preprocessed/2018-2019'
    # saved_network_dir = '/Local-Scratch/oschulte/Galen/icehockey-models/lstm_Qs_model_saved_NN/lstm_saved_networks_featurev1_Qs_batch32_iterate10_lr1e-05_v1_MaxTL10_LSTM512_dense256'
    number_of_total_game = len(dir_games_all)
    icehockey_mdn_Qs_config.Learn.number_of_total_game = number_of_total_game

    sess = tf.Session()
    model = TD_Prediction(config=icehockey_mdn_Qs_config)
    model()
    sess.run(tf.global_variables_initializer())

    if not local_test_flag:
        if not os.path.exists(saved_network_dir):
            os.mkdir(saved_network_dir)
        # save the training and testing dir list
        if os.path.exists(saved_network_dir + '/training_file_dirs_all.csv'):
            os.rename(saved_network_dir + '/training_file_dirs_all.csv',
                      saved_network_dir + '/bak_training_file_dirs_all_{0}.csv'
                      .format(datetime.date.today().strftime("%Y%B%d")))
        if os.path.exists(saved_network_dir + '/testing_file_dirs_all.csv'):
            os.rename(saved_network_dir + '/testing_file_dirs_all.csv',
                      saved_network_dir + '/bak_testing_file_dirs_all_{0}.csv'
                      .format(datetime.date.today().strftime("%Y%B%d")))
        if os.path.exists(saved_network_dir + '/validate_file_dirs_all.csv'):
            os.rename(saved_network_dir + '/validate_file_dirs_all.csv',
                      saved_network_dir + '/bak_validate_file_dirs_all_{0}.csv'
                      .format(datetime.date.today().strftime("%Y%B%d")))
        with open(saved_network_dir + '/training_file_dirs_all.csv', 'wb') as f:
            for dir in training_dir_games_all:
                f.write(dir + '\n')
        with open(saved_network_dir + '/validate_file_dirs_all.csv', 'wb') as f:
            for dir in validate_dir_games_all:
                f.write(dir + '\n')
        with open(saved_network_dir + '/testing_file_dirs_all.csv', 'wb') as f:
            for dir in testing_dir_games_all:
                f.write(dir + '\n')
    run_network(sess=sess, model=model, config=icehockey_mdn_Qs_config, log_dir=log_dir,
                save_network_dir=saved_network_dir, data_store=data_store_dir,
                training_dir_games_all=training_dir_games_all, testing_dir_games_all=tmp_testing_dir_games_all,
                player_id_cluster_dir=None, save_flag=save_flag)
    sess.close()
Beispiel #5
0
def validate_score_diff(model_data_store_dir,
                        data_name,
                        source_data_dir,
                        data_store,
                        model_category,
                        file_writer=None,
                        cv_number=None):
    length_max = 5000
    length_min = 5000

    real_label_record_all = None
    output_label_record_all = None
    game_time_record_all = None

    for running_number in range(0, cv_number):
        saved_network_dir, log_dir = get_model_and_log_name(
            config=icehockey_model_config,
            model_catagoery=model_category,
            running_number=running_number)
        testing_dir_games_all = []
        # with open('../../sport_resource/ice_hockey_201819/' + '/testing_file_dirs_all_v2.csv', 'rb') as f:
        with open(saved_network_dir + '/testing_file_dirs_all.csv', 'rb') as f:
            testing_dir_all = f.readlines()
        for testing_dir in testing_dir_all:
            testing_dir_games_all.append(str(int(testing_dir)))

        testing_dir_games_all = testing_dir_games_all[:20]
        real_label_record = np.ones([len(testing_dir_games_all), length_max
                                     ]) * -100
        output_label_record = np.ones([len(testing_dir_games_all), length_max
                                       ]) * -100
        game_time_record = np.ones([len(testing_dir_games_all), length_max
                                    ]) * -100

        for dir_index in range(0, len(testing_dir_games_all)):
            print('Processing game {0}'.format(dir_index))
            testing_dir = testing_dir_games_all[dir_index]
            if data_name is not None:
                model_values = obtain_model_predictions(
                    model_data_store_dir, testing_dir, data_name,
                    running_number)

            score_difference_game = read_feature_within_events(
                testing_dir,
                data_store,
                'scoreDifferential',
                transfer_home_number=True,
                data_store=source_data_dir,
                allow_overtime=False)
            game_time_list = read_feature_within_events(
                testing_dir,
                data_store,
                'gameTime',
                transfer_home_number=False,
                data_store=source_data_dir,
                allow_overtime=False)

            if data_name is None:
                output_label_all = np.asarray(
                    len(score_difference_game) * [0]) + score_difference_game
                real_label_all = [score_difference_game[-1]
                                  ] * len(score_difference_game)
                game_time_list = []
                for j in range(
                        0, len(score_difference_game)
                ):  # TODO: how to map to the time under cross-validation?
                    game_time_list.append(
                        float(3600) / len(score_difference_game) * j)
            else:
                real_label_all = [score_difference_game[-1]
                                  ] * len(score_difference_game)
                output_label_all = model_values[:len(score_difference_game), 0] - \
                                   model_values[:len(score_difference_game), 1] + score_difference_game[
                                                                                  :len(score_difference_game)]

            real_label_record[dir_index][:len(real_label_all)] = real_label_all
            output_label_record[dir_index][:len(output_label_all
                                                )] = output_label_all
            game_time_record[dir_index][:len(game_time_list)] = game_time_list
        if real_label_record_all is None:
            real_label_record_all = real_label_record
            output_label_record_all = output_label_record
            game_time_record_all = game_time_record
        else:
            real_label_record_all = np.concatenate(
                [real_label_record_all, real_label_record], axis=0)
            output_label_record_all = np.concatenate(
                [output_label_record_all, output_label_record], axis=0)
            game_time_record_all = np.concatenate(
                [game_time_record_all, game_time_record], axis=0)

    acc_diff_mean_by_event = []
    acc_diff_var_by_event = []
    acc_global = []
    game_time_diff_record_list = []
    game_time_list = []

    include_number = 0
    for i in range(0, 3601):
        game_time_diff_record_list.append([])
        game_time_list.append(i)
    for i in range(0, length_max):
        real_outcome_record_step = real_label_record_all[:, i]
        model_output_record_step = output_label_record_all[:, i]
        game_time_record_step = game_time_record_all[:, i]
        diff_list = []
        total_number = 0
        print_flag = True
        check_flag = False
        include_flag = False
        for win_index in range(0, len(real_outcome_record_step)):
            if model_output_record_step[win_index] == -100 or \
                            real_outcome_record_step[win_index] == -100 or \
                            game_time_record_step[win_index] == -100:
                check_flag = True
                # include_flag = False
                continue
            else:
                include_flag = True

            diff = abs(model_output_record_step[win_index] -
                       real_outcome_record_step[win_index])
            game_time_index = int(game_time_record_step[win_index])
            game_time_diff_record_list[game_time_index].append(diff)
            diff_list.append(diff)
            acc_global.append(diff)
            total_number += 1
        if check_flag:
            diff_list_new = []
            for diff in diff_list:
                if diff < 0.2:
                    diff_list_new.append(diff)
            if len(diff_list_new) == 0:
                include_flag = False

        if include_flag:
            acc_diff_mean_by_event.append(np.mean(np.asarray(diff_list)))
            acc_diff_var_by_event.append(np.var(np.asarray(diff_list)))
            if file_writer is not None:
                file_writer.write('diff of event {0} is {1}\n'.format(
                    str(include_number),
                    str(acc_diff_mean_by_event[include_number])))

            if print_flag:
                if include_number % 100 == 0:
                    print('diff of event {0} is {1}'.format(
                        str(include_number),
                        str(acc_diff_mean_by_event[include_number])))
            include_number += 1
        else:
            continue
            # event_numbers.append(i)

    acc_diff_mean_by_time = []
    acc_diff_var_by_time = []
    for i in range(0, 3601):
        game_time_diff_list = game_time_diff_record_list[i]
        acc_diff_mean_by_time.append(np.mean(np.asarray(game_time_diff_list)))
        acc_diff_var_by_time.append(np.var(np.asarray(game_time_diff_list)))

        if i % 100 == 0:
            print('diff of time {0} is {1}'.format(
                str(i), str(acc_diff_mean_by_time[i])))

    print('diff of {0} has the mean {1} and variance {2}.'.format(
        model_category, str(np.mean(np.asarray(acc_global))),
        str(np.var(np.asarray(acc_global)))))
    return np.asarray(acc_diff_mean_by_event), np.asarray(acc_diff_var_by_event), \
           range(len(acc_diff_mean_by_event)), \
           np.asarray(acc_diff_mean_by_time), np.asarray(acc_diff_var_by_time), game_time_list
def run():
    play_info = '_pid'
    running_number = 2
    type = 'action_goal'
    if type == 'ap_playerId':
        player_id_cluster_dir = '../sport_resource/ice_hockey_201819/player_id_ap_cluster.json'
        predicted_target = 'PlayerPositionClusterAP'  # playerId_
    elif type == 'km_playerId':
        player_id_cluster_dir = '../sport_resource/ice_hockey_201819/player_id_km_cluster.json'
        predicted_target = 'PlayerPositionClusterKM'  # playerId_
    elif type == 'pos_playerId':
        player_id_cluster_dir = None
        predicted_target = 'playerposition'
    elif type == 'pids':
        player_id_cluster_dir = '../sport_resource/ice_hockey_201819/local_player_id_2018_2019.json'
        predicted_target = 'PlayerLocalId'  # playerId_
    elif type == 'action_goal':
        player_id_cluster_dir = None
        predicted_target = 'ActionGoal'
    elif type == 'action':
        player_id_cluster_dir = None
        predicted_target = 'Action'
    else:
        raise ValueError('unknown type')

    os.environ["CUDA_VISIBLE_DEVICES"] = "1"

    BalancedMemoryBuffer.set_cache_memory(cache_number=2)

    tt_lstm_config_path = "../environment_settings/ice_hockey_{0}_prediction{1}.yaml".format(
        predicted_target, play_info)
    lstm_prediction_config = LSTMPredictConfig.load(tt_lstm_config_path)

    local_test_flag = False
    saved_network_dir, log_dir = get_model_and_log_name(
        config=lstm_prediction_config,
        model_catagoery='lstm_prediction',
        running_number=running_number)
    if local_test_flag:
        data_store_dir = "/Users/liu/Desktop/Ice-hokcey-data-sample/feature-sample"
        source_data_store_dir = '/Users/liu/Desktop/Ice-hokcey-data-sample/data-sample/'
        dir_games_all = os.listdir(data_store_dir)
        training_dir_games_all = os.listdir(data_store_dir)
        validate_dir_games_all = os.listdir(data_store_dir)
        testing_dir_games_all = os.listdir(data_store_dir)
    else:
        data_store_dir = lstm_prediction_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/"
        source_data_store_dir = lstm_prediction_config.Learn.save_mother_dir + '/oschulte/Galen/2018-2019/'
        dir_games_all = os.listdir(data_store_dir)
        if running_number == 0:
            training_dir_games_all = dir_games_all[0:len(dir_games_all) / 5 *
                                                   4 - running_number *
                                                   len(dir_games_all) / 5]
        else:
            training_dir_games_all = dir_games_all[
                                     0: len(dir_games_all) / 5 * 4 - running_number * len(dir_games_all) / 5] \
                                     + dir_games_all[-running_number * len(dir_games_all) / 5:]
        test_validate_dir_games_all = [
            item for item in dir_games_all
            if item not in training_dir_games_all
        ]
        testing_dir_games_all = test_validate_dir_games_all[:len(
            test_validate_dir_games_all) / 2]
        validate_dir_games_all = test_validate_dir_games_all[
            len(test_validate_dir_games_all) / 2:]
        tmp_testing_dir_games_all = testing_dir_games_all[
            -10:]  # TODO: it is a small running testing, not the real one
    number_of_total_game = len(dir_games_all)
    lstm_prediction_config.Learn.number_of_total_game = number_of_total_game

    sess = tf.Session()
    model = Td_Prediction_NN(config=lstm_prediction_config)
    model.initialize_ph()
    model.build()
    model.call()
    sess.run(tf.global_variables_initializer())

    if not local_test_flag:
        if not os.path.exists(saved_network_dir):
            os.mkdir(saved_network_dir)
        # save the training and testing dir list
        if os.path.exists(saved_network_dir + '/training_file_dirs_all.csv'):
            os.rename(
                saved_network_dir + '/training_file_dirs_all.csv',
                saved_network_dir +
                '/bak_training_file_dirs_all_{0}.csv'.format(
                    datetime.date.today().strftime("%Y%B%d")))
        if os.path.exists(saved_network_dir + '/testing_file_dirs_all.csv'):
            os.rename(
                saved_network_dir + '/testing_file_dirs_all.csv',
                saved_network_dir +
                '/bak_testing_file_dirs_all_{0}.csv'.format(
                    datetime.date.today().strftime("%Y%B%d")))
        if os.path.exists(saved_network_dir + '/validate_file_dirs_all.csv'):
            os.rename(
                saved_network_dir + '/validate_file_dirs_all.csv',
                saved_network_dir +
                '/bak_validate_file_dirs_all_{0}.csv'.format(
                    datetime.date.today().strftime("%Y%B%d")))
        with open(saved_network_dir + '/training_file_dirs_all.csv',
                  'wb') as f:
            for dir in training_dir_games_all:
                f.write(dir + '\n')
        with open(saved_network_dir + '/validate_file_dirs_all.csv',
                  'wb') as f:
            for dir in validate_dir_games_all:
                f.write(dir + '\n')
        with open(saved_network_dir + '/testing_file_dirs_all.csv', 'wb') as f:
            for dir in testing_dir_games_all:
                f.write(dir + '\n')

    run_network(sess=sess,
                model=model,
                config=lstm_prediction_config,
                training_dir_games_all=training_dir_games_all,
                testing_dir_games_all=tmp_testing_dir_games_all,
                model_data_store_dir=data_store_dir,
                player_id_cluster_dir=player_id_cluster_dir,
                source_data_store_dir=source_data_store_dir,
                save_network_dir=saved_network_dir)
    sess.close()
def run_calibration():
    model_type = 'lstm_Qs'
    player_info = '_pid'
    apply_old = False
    apply_difference = False
    running_numbers = [0, 1, 2, 3, 4]
    if model_type == 'cvrnn':
        model_number = 601
        embed_mode = '_embed_random'
        predicted_target = '_PlayerLocalId_predict_nex_goal'
        icehockey_config_path = "../../environment_settings/" \
                                "icehockey_cvrnn{0}_config{1}{2}.yaml" \
            .format(predicted_target, player_info, embed_mode)
        config = CVRNNCongfig.load(icehockey_config_path)
    elif model_type == 'cvae':
        model_number = 601
        embed_mode = ''
        predicted_target = '_PlayerLocalId_predict_next_goal'  # playerId_
        icehockey_config_path = "../../environment_settings/icehockey_cvae_lstm{0}_config{1}.yaml".format(
            predicted_target, player_info)
        config = CVAECongfig.load(icehockey_config_path)
    elif model_type == 'vhe':
        model_number = 601
        embed_mode = ''
        predicted_target = '_PlayerLocalId_predict_next_goal'  # playerId_
        icehockey_config_path = "../../environment_settings/icehockey_vhe_lstm{0}_config{1}.yaml".format(
            predicted_target, player_info)
        config = CVAECongfig.load(icehockey_config_path)
    elif model_type == 'encoder':
        model_number = 901
        embed_mode = ''
        predicted_target = '_PlayerLocalId_predict_next_goal'
        # player_id_cluster_dir = '../sport_resource/ice_hockey_201819/local_player_id_2018_2019.json'
        icehockey_encoder_config_path = "../../environment_settings/" \
                                        "icehockey_stats_lstm_encoder{0}" \
                                        "_config{1}.yaml".format(predicted_target, player_info)
        config = EncoderConfig.load(icehockey_encoder_config_path)
    elif model_type == 'lstm_Qs':
        embed_mode = ''
        # 'model_2101_three_cut_lstm_next_Qs_featurev1_next_Qs_batch32_iterate10_lr1e-05_v1_MaxTL10_LSTM512_dense256'
        model_number = 901
        icehockey_config_path = "../../environment_settings/ice_hockey_predict_Qs_lstm{0}.yaml".format(
            player_info)
        config = LSTMQsCongfig.load(icehockey_config_path)
    else:
        raise ValueError('incorrect model type {0}'.format(model_type))
    calibration_features = ['period', 'scoreDifferential', 'zone', 'manpowerSituation', 'home_away']
    calibration_bins = {'period': {'feature_name': ('period'), 'range': (1, 2, 3, 4)},
                        'scoreDifferential': {'feature_name': ('scoreDifferential'), 'range': range(-10, 10)},
                        'zone': {'feature_name': ('zone'), 'range': ('dz', 'nz', 'oz')},
                        'manpowerSituation': {'feature_name': ('manpowerSituation'),
                                              'range': ('shortHanded', 'evenStrength', 'powerPlay')},
                        'home_away': {'feature_name': ('home_away'), 'range': (1, 0)},
                        # TODO: we must add the home/away label
                        }
    source_data_dir = '/Local-Scratch/oschulte/Galen/2018-2019/'
    model_data_store_dir = '/Local-Scratch/oschulte/Galen/Ice-hockey-data/2018-2019'


    Cali = Calibration(bins=calibration_bins, source_data_dir=source_data_dir,
                       calibration_features=calibration_features, config=config,
                       model_data_store_dir=model_data_store_dir, apply_old=apply_old,
                       apply_difference=apply_difference,
                       model_type=model_type, model_number=model_number,
                       player_info=player_info, calibration_type='next_goal',
                       # testing_dir_all=testing_dir_games_all[:1],
                       embed_mode=embed_mode,
                       focus_actions_list=[],
                       if_apply_cv=True,
                       running_numbers=running_numbers)
    Cali.construct_bin_dicts()
    for running_number in running_numbers:
        saved_network_dir, log_dir = get_model_and_log_name(config=config,
                                                            model_catagoery=model_type,
                                                            running_number=running_number)
        with open(saved_network_dir + '/testing_file_dirs_all.csv', 'rb') as f:
            # with open('../../sport_resource/ice_hockey_201819/' + '/testing_file_dirs_all.csv', 'rb') as f:
            testing_dir_all = f.readlines()
        testing_dir_games_all = []
        for testing_dir in testing_dir_all:
            testing_dir_games_all.append(str(int(testing_dir)) + '-playsequence-wpoi.json')
        Cali.aggregate_calibration_values(testing_dir_games_all=testing_dir_games_all, running_number=running_number)
    Cali.compute_distance()
    print Cali.save_calibration_dir
Beispiel #8
0
def run():
    training = True
    local_test_flag = False
    box_msg = ''
    predict_action = '_predict_nex_goal'
    embed_mode = '_embed_random'
    running_number = 0  # running_number is [0, 1, 2, 3, 4] for 5-fold cross validation
    if len(predict_action) > 0:
        extra_prediction_flag = True
    else:
        extra_prediction_flag = False

    player_id_cluster_dir = '../sport_resource/ice_hockey_201819/local_player_id_2018_2019.json'
    predicted_target = '_PlayerLocalId'  # playerId_


    icehockey_cvrnn_config_path = "../environment_settings/" \
                                  "icehockey_cvrnn{0}{2}_config{1}{3}.yaml".format(predicted_target,
                                                                                   box_msg,
                                                                                   predict_action,
                                                                                   embed_mode)
    name_training_record_file = "record_training_cvrnn{0}{2}_config{1}{3}{4}.yaml".format(
        predicted_target, box_msg, predict_action, embed_mode,
        datetime.date.today().strftime('%Y-%m-%d-%H'))
    icehockey_cvrnn_config = CVRNNCongfig.load(icehockey_cvrnn_config_path)
    Prediction_MemoryBuffer.set_cache_memory(
        cache_number=icehockey_cvrnn_config.Arch.Predict.output_size)
    saved_network_dir, log_dir = get_model_and_log_name(
        config=icehockey_cvrnn_config,
        model_catagoery='cvrnn',
        running_number=running_number,
        date_msg='')

    source_data_dir = icehockey_cvrnn_config.Learn.save_mother_dir + '/oschulte/Galen/2018-2019/'  # you source data (before pre-preprocessing)
    data_store_dir = icehockey_cvrnn_config.Learn.save_mother_dir + '/oschulte/Galen/Ice-hockey-data/2018-2019/'  # your source data (after pre-preprocessing)

    dir_games_all = os.listdir(data_store_dir)
    # shuffle(dir_games_all)  # randomly shuffle the list
    if running_number == 0:
        training_dir_games_all = dir_games_all[0:len(dir_games_all) / 5 * 4 -
                                               running_number *
                                               len(dir_games_all) / 5]
    else:
        training_dir_games_all = dir_games_all[0: len(dir_games_all) / 5 * 4 - running_number * len(dir_games_all) / 5] \
                                 + dir_games_all[-running_number * len(dir_games_all) / 5:]

    test_validate_dir_games_all = [
        item for item in dir_games_all if item not in training_dir_games_all
    ]

    testing_dir_games_all = test_validate_dir_games_all[:len(
        test_validate_dir_games_all) / 2]
    validate_dir_games_all = test_validate_dir_games_all[
        len(test_validate_dir_games_all) / 2:]
    tmp_testing_dir_games_all = testing_dir_games_all[
        -10:]  # TODO: it is a small running testing, not the real one
    number_of_total_game = len(dir_games_all)
    icehockey_cvrnn_config.Learn.number_of_total_game = number_of_total_game

    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    sess = tf.Session()
    cvrnn = CVRNN(config=icehockey_cvrnn_config,
                  extra_prediction_flag=extra_prediction_flag,
                  deterministic_decoder=True)
    cvrnn()
    sess.run(tf.global_variables_initializer())

    if not local_test_flag:
        if not os.path.exists(saved_network_dir):
            os.mkdir(saved_network_dir)
        # save the training and testing dir list
        if os.path.exists(saved_network_dir + '/training_file_dirs_all.csv'):
            os.rename(
                saved_network_dir + '/training_file_dirs_all.csv',
                saved_network_dir +
                '/bak_training_file_dirs_all_{0}.csv'.format(
                    datetime.date.today().strftime("%Y%B%d")))
        if os.path.exists(saved_network_dir + '/testing_file_dirs_all.csv'):
            os.rename(
                saved_network_dir + '/testing_file_dirs_all.csv',
                saved_network_dir +
                '/bak_testing_file_dirs_all_{0}.csv'.format(
                    datetime.date.today().strftime("%Y%B%d")))
        if os.path.exists(saved_network_dir + '/validate_file_dirs_all.csv'):
            os.rename(
                saved_network_dir + '/validate_file_dirs_all.csv',
                saved_network_dir +
                '/bak_validate_file_dirs_all_{0}.csv'.format(
                    datetime.date.today().strftime("%Y%B%d")))
        with open(saved_network_dir + '/training_file_dirs_all.csv',
                  'wb') as f:
            for dir in training_dir_games_all:
                f.write(dir + '\n')
        with open(saved_network_dir + '/validate_file_dirs_all.csv',
                  'wb') as f:
            for dir in validate_dir_games_all:
                f.write(dir + '\n')
        with open(saved_network_dir + '/testing_file_dirs_all.csv', 'wb') as f:
            for dir in testing_dir_games_all:
                f.write(dir + '\n')
    print('training the model.')
    with open(name_training_record_file, 'w') as training_file:
        run_network(sess=sess,
                    model=cvrnn,
                    config=icehockey_cvrnn_config,
                    log_dir=log_dir,
                    save_network_dir=saved_network_dir,
                    data_store=data_store_dir,
                    source_data_dir=source_data_dir,
                    training_dir_games_all=training_dir_games_all,
                    testing_dir_games_all=tmp_testing_dir_games_all,
                    player_id_cluster_dir=player_id_cluster_dir,
                    training_file=training_file)
    sess.close()
def run():
    validate_embedding_tag = '1001'
    predicted_target = 'action'
    is_probability = True if predicted_target == 'action' else False
    de_config_path = "../environment_settings/ice_hockey_{0}_de.yaml".format(
        predicted_target)
    de_config = DEEmbedCongfig.load(de_config_path)

    local_test_flag = False
    # saved_network_dir, log_dir = get_model_and_log_name(config=icehockey_cvrnn_config)
    if local_test_flag:
        data_store_dir = "/Users/liu/Desktop/Ice-hokcey-data-sample/feature-sample"
        dir_games_all = os.listdir(data_store_dir)
        training_dir_games_all = os.listdir(data_store_dir)
        testing_dir_games_all = os.listdir(data_store_dir)
        saved_network_dir = None
    else:
        data_store_dir = de_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/"
        dir_games_all = os.listdir(data_store_dir)
        # shuffle(dir_games_all)  # randomly shuffle the list
        training_dir_games_all = dir_games_all[0:len(dir_games_all) / 10 * 8]
        validating_dir_games_all = dir_games_all[len(dir_games_all) / 10 *
                                                 8:len(dir_games_all) / 10 * 9]
        # testing_dir_games_all = dir_games_all[len(dir_games_all)/10*9:]
        testing_dir_games_all = dir_games_all[-len(dir_games_all) /
                                              10:]  # TODO: testing
        saved_network_dir, log_dir = get_model_and_log_name(
            config=de_config,
            model_catagoery='de_embed',
            train_flag=False,
            embedding_tag=validate_embedding_tag)
    number_of_total_game = len(dir_games_all)
    de_config.Learn.number_of_total_game = number_of_total_game

    sess = tf.Session()
    model = DeterministicEmbedding(config=de_config,
                                   is_probability=is_probability)
    model.build(validate_embedding_tag)
    model()
    sess.run(tf.global_variables_initializer())
    if not local_test_flag:
        if not os.path.exists(saved_network_dir):
            os.mkdir(saved_network_dir)
        # save the training and testing dir list
        if os.path.exists(saved_network_dir + '/training_file_dirs_all.csv'):
            os.rename(saved_network_dir + '/training_file_dirs_all.csv',
                      saved_network_dir + '/bak_training_file_dirs_all.csv')
        if os.path.exists(saved_network_dir + '/testing_file_dirs_all.csv'):
            os.rename(saved_network_dir + '/testing_file_dirs_all.csv',
                      saved_network_dir + '/bak_testing_file_dirs_all.csv')
        # save the training and testing dir list
        with open(saved_network_dir + '/training_file_dirs_all.csv',
                  'wb') as f:
            for dir in dir_games_all[0:len(dir_games_all) / 10 * 8]:
                f.write(dir + '\n')
        with open(saved_network_dir + '/testing_file_dirs_all.csv', 'wb') as f:
            for dir in dir_games_all[len(dir_games_all) / 10 * 9:]:
                f.write(dir + '\n')
    run_network(sess=sess,
                model=model,
                config=de_config,
                training_dir_games_all=training_dir_games_all,
                testing_dir_games_all=testing_dir_games_all,
                data_store=data_store_dir,
                predicted_target=predicted_target,
                save_network_dir=saved_network_dir,
                validate_embedding_tag=validate_embedding_tag)
    sess.close()
def run():
    local_test_flag = False
    player_id_type = 'local_id'
    if player_id_type == 'ap_cluster':
        player_id_cluster_dir = '../sport_resource/ice_hockey_201819/player_id_ap_cluster.json'
        predicted_target = '_PlayerPositionClusterAP'  # playerId_
    elif player_id_type == 'km_cluster':
        player_id_cluster_dir = '../sport_resource/ice_hockey_201819/player_id_km_cluster.json'
        predicted_target = '_PlayerPositionClusterKM'  # playerId_
    elif player_id_type == 'local_id':
        player_id_cluster_dir = '../sport_resource/ice_hockey_201819/local_player_id_2018_2019.json'
        predicted_target = '_PlayerLocalId'  # playerId_
    else:
        player_id_cluster_dir = None
        predicted_target = ''

    icehockey_tpp_cvrnn_config_path = "../environment_settings/icehockey_cvrnn{0}_config.yaml".format(
        predicted_target)
    icehockey_cvrnn_config = TPPCVRNNConfig.load(
        icehockey_tpp_cvrnn_config_path)
    saved_network_dir, log_dir = get_model_and_log_name(
        config=icehockey_cvrnn_config, model_catagoery='tpp_cvrnn')

    if local_test_flag:
        data_store_dir = "/Users/liu/Desktop/Ice-hokcey-data-sample/feature-sample"
        dir_games_all = os.listdir(data_store_dir)
        training_dir_games_all = os.listdir(data_store_dir)
        testing_dir_games_all = os.listdir(data_store_dir)
    else:
        data_store_dir = icehockey_cvrnn_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/"
        dir_games_all = os.listdir(data_store_dir)
        # shuffle(dir_games_all)  # randomly shuffle the list
        training_dir_games_all = dir_games_all[0:len(dir_games_all) / 10 * 8]
        # testing_dir_games_all = dir_games_all[len(dir_games_all)/10*9:]
        testing_dir_games_all = dir_games_all[-10:]  # TODO: testing
    number_of_total_game = len(dir_games_all)
    icehockey_cvrnn_config.Learn.number_of_total_game = number_of_total_game

    sess = tf.Session()
    tpp_cvrnn = TPPCVRNN(config=icehockey_cvrnn_config)
    tpp_cvrnn()
    sess.run(tf.global_variables_initializer())
    if not local_test_flag:
        if not os.path.exists(saved_network_dir):
            os.mkdir(saved_network_dir)
        # save the training and testing dir list
        if os.path.exists(saved_network_dir + '/training_file_dirs_all.csv'):
            os.rename(saved_network_dir + '/training_file_dirs_all.csv',
                      saved_network_dir + '/bak_training_file_dirs_all.csv')
        if os.path.exists(saved_network_dir + '/testing_file_dirs_all.csv'):
            os.rename(saved_network_dir + '/testing_file_dirs_all.csv',
                      saved_network_dir + '/bak_testing_file_dirs_all.csv')
        # save the training and testing dir list
        with open(saved_network_dir + '/training_file_dirs_all.csv',
                  'wb') as f:
            for dir in dir_games_all[0:len(dir_games_all) / 10 * 8]:
                f.write(dir + '\n')
        with open(saved_network_dir + '/testing_file_dirs_all.csv', 'wb') as f:
            for dir in dir_games_all[len(dir_games_all) / 10 * 9:]:
                f.write(dir + '\n')
    run_network(sess=sess,
                model=tpp_cvrnn,
                config=icehockey_cvrnn_config,
                log_dir=log_dir,
                save_network_dir=saved_network_dir,
                data_store=data_store_dir,
                training_dir_games_all=training_dir_games_all,
                testing_dir_games_all=testing_dir_games_all,
                player_id_cluster_dir=player_id_cluster_dir)
    sess.close()
Beispiel #11
0
    sess_nn = tf.InteractiveSession()
    model_nn = validate_model_initialization(sess_nn=sess_nn, model_category=model_category,
                                             config=icehockey_model_config)

    acc_all = []
    ll_all = []

    # running_numbers = [0,1,2,3,4]
    running_numbers = [0]

    with open('./results/player_id_acc_' + model_category + '_'
              + str(model_number) + player_info + msg_bounding+sparse_msg+'_q_cv', 'wb') as file_writer:

        for running_number in running_numbers:
            saved_network_dir, log_dir = get_model_and_log_name(config=icehockey_model_config,
                                                                model_catagoery=model_category,
                                                                running_number=running_number)

            testing_dir_games_all = []
            # with open('../../sport_resource/ice_hockey_201819/testing_file_dirs_all.csv', 'rb') as f:
            with open(saved_network_dir + '/testing_file_dirs_all.csv', 'rb') as f:
                testing_dir_all = f.readlines()
            for testing_dir in testing_dir_all:
                testing_dir_games_all.append(str(int(testing_dir)))
            model_data_store_dir = icehockey_model_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/"
            source_data_store = '/Local-Scratch/oschulte/Galen/2018-2019/'

            # data_name = get_data_name(icehockey_model_config, model_category, model_number)

            print(model_category + '_' + str(model_number) + player_info)
Beispiel #12
0
def run_calibration():
    model_type = 'cvrnn'
    player_info = '_box'
    apply_old = False
    apply_difference = True
    if model_type == 'cvrnn':
        model_number = 2101
        predicted_target = '_PlayerLocalId'
        icehockey_config_path = "../../environment_settings/icehockey_cvrnn{0}_config{1}.yaml".format(
            predicted_target, player_info)
        config = CVRNNCongfig.load(icehockey_config_path)

    elif model_type == 'lstm_diff':
        model_number = 2101
        icehockey_config_path = "../../environment_settings/ice_hockey_predict_score_diff_lstm{0}.yaml".format(
            player_info)
        config = LSTMQsCongfig.load(icehockey_config_path)
    else:
        raise ValueError('incorrect model type {0}'.format(model_type))
    calibration_features = [
        'period', 'scoreDifferential', 'zone', 'manpowerSituation', 'home_away'
    ]
    calibration_bins = {
        'period': {
            'feature_name': ('period'),
            'range': (1, 2, 3, 4)
        },
        'scoreDifferential': {
            'feature_name': ('scoreDifferential'),
            'range': range(-10, 10)
        },
        'zone': {
            'feature_name': ('zone'),
            'range': ('dz', 'nz', 'oz')
        },
        'manpowerSituation': {
            'feature_name': ('manpowerSituation'),
            'range': ('shortHanded', 'evenStrength', 'powerPlay')
        },
        'home_away': {
            'feature_name': ('home_away'),
            'range': (1, 0)
        },
        # TODO: we must add the home/away label
    }
    source_data_dir = '/Local-Scratch/oschulte/Galen/2018-2019/'
    model_data_store_dir = '/Local-Scratch/oschulte/Galen/Ice-hockey-data/2018-2019'

    testing_dir_games_all = []
    # with open('../../sport_resource/ice_hockey_201819/testing_file_dirs_all.csv', 'rb') as f:
    saved_network_dir, log_dir = get_model_and_log_name(
        config=config, model_catagoery=model_type)
    # with open(saved_network_dir + '/testing_file_dirs_all.csv', 'rb') as f:
    with open(
            '../../sport_resource/ice_hockey_201819/' +
            '/testing_file_dirs_all.csv', 'rb') as f:
        testing_dir_all = f.readlines()
    for testing_dir in testing_dir_all:
        testing_dir_games_all.append(
            str(int(testing_dir)) + '-playsequence-wpoi.json')

    Cali = Calibration(bins=calibration_bins,
                       source_data_dir=source_data_dir,
                       calibration_features=calibration_features,
                       config=config,
                       model_data_store_dir=model_data_store_dir,
                       apply_old=apply_old,
                       apply_difference=apply_difference,
                       model_type=model_type,
                       model_number=model_number,
                       player_info=player_info,
                       calibration_type='score_diff',
                       testing_dir_all=testing_dir_games_all,
                       focus_actions_list=[])
    Cali.construct_bin_dicts()
    Cali.aggregate_calibration_values()
    Cali.compute_distance()
    print Cali.save_calibration_dir
def run():
    training = True
    running_number = 4
    local_test_flag = False
    player_id_type = 'local_id'
    rnn_type = '_lstm'
    predict_action = '_predict_next_goal'
    player_info = ''
    if player_id_type == 'ap_cluster':
        player_id_cluster_dir = '../sport_resource/ice_hockey_201819/player_id_ap_cluster.json'
        predicted_target = '_PlayerPositionClusterAP'  # playerId_
    elif player_id_type == 'km_cluster':
        player_id_cluster_dir = '../sport_resource/ice_hockey_201819/player_id_km_cluster.json'
        predicted_target = '_PlayerPositionClusterKM'  # playerId_
    elif player_id_type == 'local_id':
        player_id_cluster_dir = '../sport_resource/ice_hockey_201819/local_player_id_2018_2019.json'
        predicted_target = '_PlayerLocalId'  # playerId_
    else:
        player_id_cluster_dir = None
        predicted_target = ''

    icehockey_encoder_config_path = "../environment_settings/" \
                                    "icehockey_stats{1}_encoder{0}{2}" \
                                    "_config{3}.yaml".format(predicted_target,
                                                             rnn_type,
                                                             predict_action,
                                                             player_info)
    icehockey_encoder_config = EncoderConfig.load(icehockey_encoder_config_path)
    Prediction_MemoryBuffer.set_cache_memory(cache_number=icehockey_encoder_config.Arch.Predict.output_node)
    saved_network_dir, log_dir = get_model_and_log_name(config=icehockey_encoder_config, model_catagoery='encoder',
                                                        running_number=running_number)

    if local_test_flag:
        data_store_dir = "/Users/liu/Desktop/Ice-hokcey-data-sample/feature-sample"
        dir_games_all = os.listdir(data_store_dir)
        training_dir_games_all = os.listdir(data_store_dir)
        testing_dir_games_all = os.listdir(data_store_dir)
        validate_dir_games_all = os.listdir(data_store_dir)
        tmp_testing_dir_games_all = os.listdir(data_store_dir)
        source_data_dir = '/Users/liu/Desktop/Ice-hokcey-data-sample/data-sample/'
    else:
        source_data_dir = '/Local-Scratch/oschulte/Galen/2018-2019/'
        data_store_dir = icehockey_encoder_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/"
        dir_games_all = os.listdir(data_store_dir)
        if running_number == 0:
            training_dir_games_all = dir_games_all[
                                     0: len(dir_games_all) / 5 * 4 - running_number * len(dir_games_all) / 5]
        else:
            training_dir_games_all = dir_games_all[
                                     0: len(dir_games_all) / 5 * 4 - running_number * len(dir_games_all) / 5] \
                                     + dir_games_all[-running_number * len(dir_games_all) / 5:]
        test_validate_dir_games_all = [item for item in dir_games_all if item not in training_dir_games_all]
        testing_dir_games_all = test_validate_dir_games_all[:len(test_validate_dir_games_all)/2]
        validate_dir_games_all = test_validate_dir_games_all[len(test_validate_dir_games_all) / 2:]
        tmp_testing_dir_games_all = testing_dir_games_all[-10:]  # TODO: it is a small running testing, not the real one
    number_of_total_game = len(dir_games_all)
    icehockey_encoder_config.Learn.number_of_total_game = number_of_total_game

    sess = tf.Session()
    encoder = Encoder_NN(config=icehockey_encoder_config)
    encoder()
    sess.run(tf.global_variables_initializer())
    if training:
        if not local_test_flag:
            if not os.path.exists(saved_network_dir):
                os.mkdir(saved_network_dir)
            # save the training and testing dir list
            if os.path.exists(saved_network_dir + '/training_file_dirs_all.csv'):
                os.rename(saved_network_dir + '/training_file_dirs_all.csv',
                          saved_network_dir + '/bak_training_file_dirs_all_{0}.csv'
                          .format(datetime.date.today().strftime("%Y%B%d")))
            if os.path.exists(saved_network_dir + '/testing_file_dirs_all.csv'):
                os.rename(saved_network_dir + '/testing_file_dirs_all.csv',
                          saved_network_dir + '/bak_testing_file_dirs_all_{0}.csv'
                          .format(datetime.date.today().strftime("%Y%B%d")))
            if os.path.exists(saved_network_dir + '/validate_file_dirs_all.csv'):
                os.rename(saved_network_dir + '/validate_file_dirs_all.csv',
                          saved_network_dir + '/bak_validate_file_dirs_all_{0}.csv'
                          .format(datetime.date.today().strftime("%Y%B%d")))
            with open(saved_network_dir + '/training_file_dirs_all.csv', 'wb') as f:
                for dir in training_dir_games_all:
                    f.write(dir + '\n')
            with open(saved_network_dir + '/validate_file_dirs_all.csv', 'wb') as f:
                for dir in validate_dir_games_all:
                    f.write(dir + '\n')
            with open(saved_network_dir + '/testing_file_dirs_all.csv', 'wb') as f:
                for dir in testing_dir_games_all:
                    f.write(dir + '\n')
        print('training the model.')
        run_network(sess=sess, model=encoder, config=icehockey_encoder_config, log_dir=log_dir,
                    save_network_dir=saved_network_dir, data_store=data_store_dir, source_data_dir=source_data_dir,
                    training_dir_games_all=training_dir_games_all, testing_dir_games_all=tmp_testing_dir_games_all,
                    player_id_cluster_dir=player_id_cluster_dir)
        sess.close()
    else:
        print('testing the model')
        model_number = 9301
        testing_dir_games_all = dir_games_all[len(dir_games_all) / 10 * 9:]
        saver = tf.train.Saver()
        model_path = saved_network_dir + '/' + icehockey_encoder_config.Learn.data_name + '-game--{0}'.format(
            str(model_number))
        # save_network_dir + '/' + config.Learn.data_name + '-game-'
        saver.restore(sess, model_path)
        print 'successfully load data from' + model_path

        validate_model(testing_dir_games_all,
                       data_store=data_store_dir,
                       source_data_dir=source_data_dir,
                       config=icehockey_encoder_config,
                       sess=sess,
                       model=encoder,
                       player_id_cluster_dir=player_id_cluster_dir,
                       train_game_number=None,
                       validate_cvrnn_flag=True,
                       validate_td_flag=True,
                       validate_diff_flag=True,
                       validate_predict_flag=True)
        sess.close()