def run(): local_test_flag = False icehockey_mdn_Qs_config_path = "../environment_settings/ice_hockey_predict_Qs_mdn.yaml" icehockey_mdn_Qs_config = MDNQsCongfig.load(icehockey_mdn_Qs_config_path) saved_network_dir, log_dir = get_model_and_log_name(config=icehockey_mdn_Qs_config, model_catagoery='mdn_Qs') if local_test_flag: data_store_dir = "/Users/liu/Desktop/Ice-hokcey-data-sample/feature-sample" dir_games_all = os.listdir(data_store_dir) training_dir_games_all = os.listdir(data_store_dir) testing_dir_games_all = os.listdir(data_store_dir) save_flag = False else: data_store_dir = icehockey_mdn_Qs_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/" dir_games_all = os.listdir(data_store_dir) training_dir_games_all = dir_games_all[0: len(dir_games_all) / 10 * 9] # testing_dir_games_all = dir_games_all[len(dir_games_all)/10*9:] testing_dir_games_all = dir_games_all[-10:] # TODO: testing save_flag = True number_of_total_game = len(dir_games_all) icehockey_mdn_Qs_config.Learn.number_of_total_game = number_of_total_game sess = tf.Session() model = MixtureDensityNN(config=icehockey_mdn_Qs_config) model() sess.run(tf.global_variables_initializer()) if not local_test_flag: if not os.path.exists(saved_network_dir): os.mkdir(saved_network_dir) # save the training and testing dir list if os.path.exists(saved_network_dir + '/training_file_dirs_all.csv'): os.rename(saved_network_dir + '/training_file_dirs_all.csv', saved_network_dir + '/bak_training_file_dirs_all_{0}.csv' .format(datetime.date.today().strftime("%Y%B%d"))) if os.path.exists(saved_network_dir + '/testing_file_dirs_all.csv'): os.rename(saved_network_dir + '/testing_file_dirs_all.csv', saved_network_dir + '/bak_testing_file_dirs_all_{0}.csv' .format(datetime.date.today().strftime("%Y%B%d"))) # save the training and testing dir list with open(saved_network_dir + '/training_file_dirs_all.csv', 'wb') as f: for dir in dir_games_all[0: len(dir_games_all) / 10 * 8]: f.write(dir + '\n') with open(saved_network_dir + '/testing_file_dirs_all.csv', 'wb') as f: for dir in dir_games_all[len(dir_games_all) / 10 * 9:]: f.write(dir + '\n') run_network(sess=sess, model=model, config=icehockey_mdn_Qs_config, log_dir=log_dir, save_network_dir=saved_network_dir, data_store=data_store_dir, training_dir_games_all=training_dir_games_all, testing_dir_games_all=testing_dir_games_all, player_id_cluster_dir=None, save_flag=save_flag) sess.close()
def combine_de_embeddings(): combined_embeddings = None predicted_target_all = ['state', 'action', 'reward'] for predicted_target in predicted_target_all: de_config_path = "../environment_settings/ice_hockey_{0}_de.yaml".format(predicted_target) de_config = DEEmbedCongfig.load(de_config_path) save_network_dir, log_dir = get_model_and_log_name(config=de_config, model_catagoery='de_embed', train_flag=False) save_embed_dir = save_network_dir.replace('de_embed_saved_networks', 'store_embedding'). \ replace('de_model_saved_NN', 'de_model_save_embedding') with open(save_embed_dir + '/embedding_matrix_game{0}.csv'.format(str(901)), 'r') as f: csv_reader = csv.reader(f) read_embedding = [] for row in csv_reader: # print(row) read_embedding.append(row) if combined_embeddings is None: combined_embeddings = np.asarray(read_embedding) else: combined_embeddings = np.concatenate([combined_embeddings, read_embedding], axis=1) return combined_embeddings
[model.mu_out, model.var_out], feed_dict={ model.rnn_input_ph: input_data, model.trace_lengths_ph: state_trace_length }) mu_all.append(mu) var_all.append(var) return mu_all, var_all if __name__ == '__main__': test_flag = True ci_value = 1.96 icehockey_mdn_Qs_config_path = "../environment_settings/ice_hockey_predict_Qs_mdn.yaml" icehockey_mdn_Qs_config = MDNQsCongfig.load(icehockey_mdn_Qs_config_path) saved_network_dir, log_dir = get_model_and_log_name( config=icehockey_mdn_Qs_config, model_catagoery='mdn_Qs') if test_flag: data_store_dir = "/Users/liu/Desktop/Ice-hokcey-data-sample/feature-sample" dir_games_all = os.listdir(data_store_dir) training_dir_games_all = os.listdir(data_store_dir) testing_dir_games_all = os.listdir(data_store_dir) save_flag = False else: data_store_dir = icehockey_mdn_Qs_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/" dir_games_all = os.listdir(data_store_dir) training_dir_games_all = dir_games_all[0:len(dir_games_all) / 10 * 9] # testing_dir_games_all = dir_games_all[len(dir_games_all)/10*9:] testing_dir_games_all = dir_games_all[-10:] # TODO: testing save_flag = True number_of_total_game = len(dir_games_all)
def run(): running_number = 2 os.environ["CUDA_VISIBLE_DEVICES"] = "1" local_test_flag = False type = '_pid' icehockey_mdn_Qs_config_path = "../environment_settings/ice_hockey_predict_Qs_lstm{0}.yaml".format(str(type)) icehockey_mdn_Qs_config = LSTMQsCongfig.load(icehockey_mdn_Qs_config_path) saved_network_dir, log_dir = get_model_and_log_name(config=icehockey_mdn_Qs_config, model_catagoery='lstm_Qs', running_number=running_number) if local_test_flag: data_store_dir = "/Users/liu/Desktop/Ice-hokcey-data-sample/feature-sample" dir_games_all = os.listdir(data_store_dir) training_dir_games_all = os.listdir(data_store_dir) validate_dir_games_all = os.listdir(data_store_dir) testing_dir_games_all = os.listdir(data_store_dir) tmp_testing_dir_games_all = os.listdir(data_store_dir) save_flag = False else: data_store_dir = icehockey_mdn_Qs_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/" dir_games_all = os.listdir(data_store_dir) save_flag = True if running_number == 0: training_dir_games_all = dir_games_all[ 0: len(dir_games_all) / 5 * 4 - running_number * len(dir_games_all) / 5] else: training_dir_games_all = dir_games_all[ 0: len(dir_games_all) / 5 * 4 - running_number * len(dir_games_all) / 5] \ + dir_games_all[-running_number * len(dir_games_all) / 5:] test_validate_dir_games_all = [item for item in dir_games_all if item not in training_dir_games_all] testing_dir_games_all = test_validate_dir_games_all[:len(test_validate_dir_games_all)/2] validate_dir_games_all = test_validate_dir_games_all[len(test_validate_dir_games_all) / 2:] tmp_testing_dir_games_all = testing_dir_games_all[-10:] # TODO: it is a small running testing, not the real one # data_store_dir = '/cs/oschulte/xiangyus/2019-icehockey-data-preprocessed/2018-2019' # saved_network_dir = '/Local-Scratch/oschulte/Galen/icehockey-models/lstm_Qs_model_saved_NN/lstm_saved_networks_featurev1_Qs_batch32_iterate10_lr1e-05_v1_MaxTL10_LSTM512_dense256' number_of_total_game = len(dir_games_all) icehockey_mdn_Qs_config.Learn.number_of_total_game = number_of_total_game sess = tf.Session() model = TD_Prediction(config=icehockey_mdn_Qs_config) model() sess.run(tf.global_variables_initializer()) if not local_test_flag: if not os.path.exists(saved_network_dir): os.mkdir(saved_network_dir) # save the training and testing dir list if os.path.exists(saved_network_dir + '/training_file_dirs_all.csv'): os.rename(saved_network_dir + '/training_file_dirs_all.csv', saved_network_dir + '/bak_training_file_dirs_all_{0}.csv' .format(datetime.date.today().strftime("%Y%B%d"))) if os.path.exists(saved_network_dir + '/testing_file_dirs_all.csv'): os.rename(saved_network_dir + '/testing_file_dirs_all.csv', saved_network_dir + '/bak_testing_file_dirs_all_{0}.csv' .format(datetime.date.today().strftime("%Y%B%d"))) if os.path.exists(saved_network_dir + '/validate_file_dirs_all.csv'): os.rename(saved_network_dir + '/validate_file_dirs_all.csv', saved_network_dir + '/bak_validate_file_dirs_all_{0}.csv' .format(datetime.date.today().strftime("%Y%B%d"))) with open(saved_network_dir + '/training_file_dirs_all.csv', 'wb') as f: for dir in training_dir_games_all: f.write(dir + '\n') with open(saved_network_dir + '/validate_file_dirs_all.csv', 'wb') as f: for dir in validate_dir_games_all: f.write(dir + '\n') with open(saved_network_dir + '/testing_file_dirs_all.csv', 'wb') as f: for dir in testing_dir_games_all: f.write(dir + '\n') run_network(sess=sess, model=model, config=icehockey_mdn_Qs_config, log_dir=log_dir, save_network_dir=saved_network_dir, data_store=data_store_dir, training_dir_games_all=training_dir_games_all, testing_dir_games_all=tmp_testing_dir_games_all, player_id_cluster_dir=None, save_flag=save_flag) sess.close()
def validate_score_diff(model_data_store_dir, data_name, source_data_dir, data_store, model_category, file_writer=None, cv_number=None): length_max = 5000 length_min = 5000 real_label_record_all = None output_label_record_all = None game_time_record_all = None for running_number in range(0, cv_number): saved_network_dir, log_dir = get_model_and_log_name( config=icehockey_model_config, model_catagoery=model_category, running_number=running_number) testing_dir_games_all = [] # with open('../../sport_resource/ice_hockey_201819/' + '/testing_file_dirs_all_v2.csv', 'rb') as f: with open(saved_network_dir + '/testing_file_dirs_all.csv', 'rb') as f: testing_dir_all = f.readlines() for testing_dir in testing_dir_all: testing_dir_games_all.append(str(int(testing_dir))) testing_dir_games_all = testing_dir_games_all[:20] real_label_record = np.ones([len(testing_dir_games_all), length_max ]) * -100 output_label_record = np.ones([len(testing_dir_games_all), length_max ]) * -100 game_time_record = np.ones([len(testing_dir_games_all), length_max ]) * -100 for dir_index in range(0, len(testing_dir_games_all)): print('Processing game {0}'.format(dir_index)) testing_dir = testing_dir_games_all[dir_index] if data_name is not None: model_values = obtain_model_predictions( model_data_store_dir, testing_dir, data_name, running_number) score_difference_game = read_feature_within_events( testing_dir, data_store, 'scoreDifferential', transfer_home_number=True, data_store=source_data_dir, allow_overtime=False) game_time_list = read_feature_within_events( testing_dir, data_store, 'gameTime', transfer_home_number=False, data_store=source_data_dir, allow_overtime=False) if data_name is None: output_label_all = np.asarray( len(score_difference_game) * [0]) + score_difference_game real_label_all = [score_difference_game[-1] ] * len(score_difference_game) game_time_list = [] for j in range( 0, len(score_difference_game) ): # TODO: how to map to the time under cross-validation? game_time_list.append( float(3600) / len(score_difference_game) * j) else: real_label_all = [score_difference_game[-1] ] * len(score_difference_game) output_label_all = model_values[:len(score_difference_game), 0] - \ model_values[:len(score_difference_game), 1] + score_difference_game[ :len(score_difference_game)] real_label_record[dir_index][:len(real_label_all)] = real_label_all output_label_record[dir_index][:len(output_label_all )] = output_label_all game_time_record[dir_index][:len(game_time_list)] = game_time_list if real_label_record_all is None: real_label_record_all = real_label_record output_label_record_all = output_label_record game_time_record_all = game_time_record else: real_label_record_all = np.concatenate( [real_label_record_all, real_label_record], axis=0) output_label_record_all = np.concatenate( [output_label_record_all, output_label_record], axis=0) game_time_record_all = np.concatenate( [game_time_record_all, game_time_record], axis=0) acc_diff_mean_by_event = [] acc_diff_var_by_event = [] acc_global = [] game_time_diff_record_list = [] game_time_list = [] include_number = 0 for i in range(0, 3601): game_time_diff_record_list.append([]) game_time_list.append(i) for i in range(0, length_max): real_outcome_record_step = real_label_record_all[:, i] model_output_record_step = output_label_record_all[:, i] game_time_record_step = game_time_record_all[:, i] diff_list = [] total_number = 0 print_flag = True check_flag = False include_flag = False for win_index in range(0, len(real_outcome_record_step)): if model_output_record_step[win_index] == -100 or \ real_outcome_record_step[win_index] == -100 or \ game_time_record_step[win_index] == -100: check_flag = True # include_flag = False continue else: include_flag = True diff = abs(model_output_record_step[win_index] - real_outcome_record_step[win_index]) game_time_index = int(game_time_record_step[win_index]) game_time_diff_record_list[game_time_index].append(diff) diff_list.append(diff) acc_global.append(diff) total_number += 1 if check_flag: diff_list_new = [] for diff in diff_list: if diff < 0.2: diff_list_new.append(diff) if len(diff_list_new) == 0: include_flag = False if include_flag: acc_diff_mean_by_event.append(np.mean(np.asarray(diff_list))) acc_diff_var_by_event.append(np.var(np.asarray(diff_list))) if file_writer is not None: file_writer.write('diff of event {0} is {1}\n'.format( str(include_number), str(acc_diff_mean_by_event[include_number]))) if print_flag: if include_number % 100 == 0: print('diff of event {0} is {1}'.format( str(include_number), str(acc_diff_mean_by_event[include_number]))) include_number += 1 else: continue # event_numbers.append(i) acc_diff_mean_by_time = [] acc_diff_var_by_time = [] for i in range(0, 3601): game_time_diff_list = game_time_diff_record_list[i] acc_diff_mean_by_time.append(np.mean(np.asarray(game_time_diff_list))) acc_diff_var_by_time.append(np.var(np.asarray(game_time_diff_list))) if i % 100 == 0: print('diff of time {0} is {1}'.format( str(i), str(acc_diff_mean_by_time[i]))) print('diff of {0} has the mean {1} and variance {2}.'.format( model_category, str(np.mean(np.asarray(acc_global))), str(np.var(np.asarray(acc_global))))) return np.asarray(acc_diff_mean_by_event), np.asarray(acc_diff_var_by_event), \ range(len(acc_diff_mean_by_event)), \ np.asarray(acc_diff_mean_by_time), np.asarray(acc_diff_var_by_time), game_time_list
def run(): play_info = '_pid' running_number = 2 type = 'action_goal' if type == 'ap_playerId': player_id_cluster_dir = '../sport_resource/ice_hockey_201819/player_id_ap_cluster.json' predicted_target = 'PlayerPositionClusterAP' # playerId_ elif type == 'km_playerId': player_id_cluster_dir = '../sport_resource/ice_hockey_201819/player_id_km_cluster.json' predicted_target = 'PlayerPositionClusterKM' # playerId_ elif type == 'pos_playerId': player_id_cluster_dir = None predicted_target = 'playerposition' elif type == 'pids': player_id_cluster_dir = '../sport_resource/ice_hockey_201819/local_player_id_2018_2019.json' predicted_target = 'PlayerLocalId' # playerId_ elif type == 'action_goal': player_id_cluster_dir = None predicted_target = 'ActionGoal' elif type == 'action': player_id_cluster_dir = None predicted_target = 'Action' else: raise ValueError('unknown type') os.environ["CUDA_VISIBLE_DEVICES"] = "1" BalancedMemoryBuffer.set_cache_memory(cache_number=2) tt_lstm_config_path = "../environment_settings/ice_hockey_{0}_prediction{1}.yaml".format( predicted_target, play_info) lstm_prediction_config = LSTMPredictConfig.load(tt_lstm_config_path) local_test_flag = False saved_network_dir, log_dir = get_model_and_log_name( config=lstm_prediction_config, model_catagoery='lstm_prediction', running_number=running_number) if local_test_flag: data_store_dir = "/Users/liu/Desktop/Ice-hokcey-data-sample/feature-sample" source_data_store_dir = '/Users/liu/Desktop/Ice-hokcey-data-sample/data-sample/' dir_games_all = os.listdir(data_store_dir) training_dir_games_all = os.listdir(data_store_dir) validate_dir_games_all = os.listdir(data_store_dir) testing_dir_games_all = os.listdir(data_store_dir) else: data_store_dir = lstm_prediction_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/" source_data_store_dir = lstm_prediction_config.Learn.save_mother_dir + '/oschulte/Galen/2018-2019/' dir_games_all = os.listdir(data_store_dir) if running_number == 0: training_dir_games_all = dir_games_all[0:len(dir_games_all) / 5 * 4 - running_number * len(dir_games_all) / 5] else: training_dir_games_all = dir_games_all[ 0: len(dir_games_all) / 5 * 4 - running_number * len(dir_games_all) / 5] \ + dir_games_all[-running_number * len(dir_games_all) / 5:] test_validate_dir_games_all = [ item for item in dir_games_all if item not in training_dir_games_all ] testing_dir_games_all = test_validate_dir_games_all[:len( test_validate_dir_games_all) / 2] validate_dir_games_all = test_validate_dir_games_all[ len(test_validate_dir_games_all) / 2:] tmp_testing_dir_games_all = testing_dir_games_all[ -10:] # TODO: it is a small running testing, not the real one number_of_total_game = len(dir_games_all) lstm_prediction_config.Learn.number_of_total_game = number_of_total_game sess = tf.Session() model = Td_Prediction_NN(config=lstm_prediction_config) model.initialize_ph() model.build() model.call() sess.run(tf.global_variables_initializer()) if not local_test_flag: if not os.path.exists(saved_network_dir): os.mkdir(saved_network_dir) # save the training and testing dir list if os.path.exists(saved_network_dir + '/training_file_dirs_all.csv'): os.rename( saved_network_dir + '/training_file_dirs_all.csv', saved_network_dir + '/bak_training_file_dirs_all_{0}.csv'.format( datetime.date.today().strftime("%Y%B%d"))) if os.path.exists(saved_network_dir + '/testing_file_dirs_all.csv'): os.rename( saved_network_dir + '/testing_file_dirs_all.csv', saved_network_dir + '/bak_testing_file_dirs_all_{0}.csv'.format( datetime.date.today().strftime("%Y%B%d"))) if os.path.exists(saved_network_dir + '/validate_file_dirs_all.csv'): os.rename( saved_network_dir + '/validate_file_dirs_all.csv', saved_network_dir + '/bak_validate_file_dirs_all_{0}.csv'.format( datetime.date.today().strftime("%Y%B%d"))) with open(saved_network_dir + '/training_file_dirs_all.csv', 'wb') as f: for dir in training_dir_games_all: f.write(dir + '\n') with open(saved_network_dir + '/validate_file_dirs_all.csv', 'wb') as f: for dir in validate_dir_games_all: f.write(dir + '\n') with open(saved_network_dir + '/testing_file_dirs_all.csv', 'wb') as f: for dir in testing_dir_games_all: f.write(dir + '\n') run_network(sess=sess, model=model, config=lstm_prediction_config, training_dir_games_all=training_dir_games_all, testing_dir_games_all=tmp_testing_dir_games_all, model_data_store_dir=data_store_dir, player_id_cluster_dir=player_id_cluster_dir, source_data_store_dir=source_data_store_dir, save_network_dir=saved_network_dir) sess.close()
def run_calibration(): model_type = 'lstm_Qs' player_info = '_pid' apply_old = False apply_difference = False running_numbers = [0, 1, 2, 3, 4] if model_type == 'cvrnn': model_number = 601 embed_mode = '_embed_random' predicted_target = '_PlayerLocalId_predict_nex_goal' icehockey_config_path = "../../environment_settings/" \ "icehockey_cvrnn{0}_config{1}{2}.yaml" \ .format(predicted_target, player_info, embed_mode) config = CVRNNCongfig.load(icehockey_config_path) elif model_type == 'cvae': model_number = 601 embed_mode = '' predicted_target = '_PlayerLocalId_predict_next_goal' # playerId_ icehockey_config_path = "../../environment_settings/icehockey_cvae_lstm{0}_config{1}.yaml".format( predicted_target, player_info) config = CVAECongfig.load(icehockey_config_path) elif model_type == 'vhe': model_number = 601 embed_mode = '' predicted_target = '_PlayerLocalId_predict_next_goal' # playerId_ icehockey_config_path = "../../environment_settings/icehockey_vhe_lstm{0}_config{1}.yaml".format( predicted_target, player_info) config = CVAECongfig.load(icehockey_config_path) elif model_type == 'encoder': model_number = 901 embed_mode = '' predicted_target = '_PlayerLocalId_predict_next_goal' # player_id_cluster_dir = '../sport_resource/ice_hockey_201819/local_player_id_2018_2019.json' icehockey_encoder_config_path = "../../environment_settings/" \ "icehockey_stats_lstm_encoder{0}" \ "_config{1}.yaml".format(predicted_target, player_info) config = EncoderConfig.load(icehockey_encoder_config_path) elif model_type == 'lstm_Qs': embed_mode = '' # 'model_2101_three_cut_lstm_next_Qs_featurev1_next_Qs_batch32_iterate10_lr1e-05_v1_MaxTL10_LSTM512_dense256' model_number = 901 icehockey_config_path = "../../environment_settings/ice_hockey_predict_Qs_lstm{0}.yaml".format( player_info) config = LSTMQsCongfig.load(icehockey_config_path) else: raise ValueError('incorrect model type {0}'.format(model_type)) calibration_features = ['period', 'scoreDifferential', 'zone', 'manpowerSituation', 'home_away'] calibration_bins = {'period': {'feature_name': ('period'), 'range': (1, 2, 3, 4)}, 'scoreDifferential': {'feature_name': ('scoreDifferential'), 'range': range(-10, 10)}, 'zone': {'feature_name': ('zone'), 'range': ('dz', 'nz', 'oz')}, 'manpowerSituation': {'feature_name': ('manpowerSituation'), 'range': ('shortHanded', 'evenStrength', 'powerPlay')}, 'home_away': {'feature_name': ('home_away'), 'range': (1, 0)}, # TODO: we must add the home/away label } source_data_dir = '/Local-Scratch/oschulte/Galen/2018-2019/' model_data_store_dir = '/Local-Scratch/oschulte/Galen/Ice-hockey-data/2018-2019' Cali = Calibration(bins=calibration_bins, source_data_dir=source_data_dir, calibration_features=calibration_features, config=config, model_data_store_dir=model_data_store_dir, apply_old=apply_old, apply_difference=apply_difference, model_type=model_type, model_number=model_number, player_info=player_info, calibration_type='next_goal', # testing_dir_all=testing_dir_games_all[:1], embed_mode=embed_mode, focus_actions_list=[], if_apply_cv=True, running_numbers=running_numbers) Cali.construct_bin_dicts() for running_number in running_numbers: saved_network_dir, log_dir = get_model_and_log_name(config=config, model_catagoery=model_type, running_number=running_number) with open(saved_network_dir + '/testing_file_dirs_all.csv', 'rb') as f: # with open('../../sport_resource/ice_hockey_201819/' + '/testing_file_dirs_all.csv', 'rb') as f: testing_dir_all = f.readlines() testing_dir_games_all = [] for testing_dir in testing_dir_all: testing_dir_games_all.append(str(int(testing_dir)) + '-playsequence-wpoi.json') Cali.aggregate_calibration_values(testing_dir_games_all=testing_dir_games_all, running_number=running_number) Cali.compute_distance() print Cali.save_calibration_dir
def run(): training = True local_test_flag = False box_msg = '' predict_action = '_predict_nex_goal' embed_mode = '_embed_random' running_number = 0 # running_number is [0, 1, 2, 3, 4] for 5-fold cross validation if len(predict_action) > 0: extra_prediction_flag = True else: extra_prediction_flag = False player_id_cluster_dir = '../sport_resource/ice_hockey_201819/local_player_id_2018_2019.json' predicted_target = '_PlayerLocalId' # playerId_ icehockey_cvrnn_config_path = "../environment_settings/" \ "icehockey_cvrnn{0}{2}_config{1}{3}.yaml".format(predicted_target, box_msg, predict_action, embed_mode) name_training_record_file = "record_training_cvrnn{0}{2}_config{1}{3}{4}.yaml".format( predicted_target, box_msg, predict_action, embed_mode, datetime.date.today().strftime('%Y-%m-%d-%H')) icehockey_cvrnn_config = CVRNNCongfig.load(icehockey_cvrnn_config_path) Prediction_MemoryBuffer.set_cache_memory( cache_number=icehockey_cvrnn_config.Arch.Predict.output_size) saved_network_dir, log_dir = get_model_and_log_name( config=icehockey_cvrnn_config, model_catagoery='cvrnn', running_number=running_number, date_msg='') source_data_dir = icehockey_cvrnn_config.Learn.save_mother_dir + '/oschulte/Galen/2018-2019/' # you source data (before pre-preprocessing) data_store_dir = icehockey_cvrnn_config.Learn.save_mother_dir + '/oschulte/Galen/Ice-hockey-data/2018-2019/' # your source data (after pre-preprocessing) dir_games_all = os.listdir(data_store_dir) # shuffle(dir_games_all) # randomly shuffle the list if running_number == 0: training_dir_games_all = dir_games_all[0:len(dir_games_all) / 5 * 4 - running_number * len(dir_games_all) / 5] else: training_dir_games_all = dir_games_all[0: len(dir_games_all) / 5 * 4 - running_number * len(dir_games_all) / 5] \ + dir_games_all[-running_number * len(dir_games_all) / 5:] test_validate_dir_games_all = [ item for item in dir_games_all if item not in training_dir_games_all ] testing_dir_games_all = test_validate_dir_games_all[:len( test_validate_dir_games_all) / 2] validate_dir_games_all = test_validate_dir_games_all[ len(test_validate_dir_games_all) / 2:] tmp_testing_dir_games_all = testing_dir_games_all[ -10:] # TODO: it is a small running testing, not the real one number_of_total_game = len(dir_games_all) icehockey_cvrnn_config.Learn.number_of_total_game = number_of_total_game os.environ["CUDA_VISIBLE_DEVICES"] = "0" sess = tf.Session() cvrnn = CVRNN(config=icehockey_cvrnn_config, extra_prediction_flag=extra_prediction_flag, deterministic_decoder=True) cvrnn() sess.run(tf.global_variables_initializer()) if not local_test_flag: if not os.path.exists(saved_network_dir): os.mkdir(saved_network_dir) # save the training and testing dir list if os.path.exists(saved_network_dir + '/training_file_dirs_all.csv'): os.rename( saved_network_dir + '/training_file_dirs_all.csv', saved_network_dir + '/bak_training_file_dirs_all_{0}.csv'.format( datetime.date.today().strftime("%Y%B%d"))) if os.path.exists(saved_network_dir + '/testing_file_dirs_all.csv'): os.rename( saved_network_dir + '/testing_file_dirs_all.csv', saved_network_dir + '/bak_testing_file_dirs_all_{0}.csv'.format( datetime.date.today().strftime("%Y%B%d"))) if os.path.exists(saved_network_dir + '/validate_file_dirs_all.csv'): os.rename( saved_network_dir + '/validate_file_dirs_all.csv', saved_network_dir + '/bak_validate_file_dirs_all_{0}.csv'.format( datetime.date.today().strftime("%Y%B%d"))) with open(saved_network_dir + '/training_file_dirs_all.csv', 'wb') as f: for dir in training_dir_games_all: f.write(dir + '\n') with open(saved_network_dir + '/validate_file_dirs_all.csv', 'wb') as f: for dir in validate_dir_games_all: f.write(dir + '\n') with open(saved_network_dir + '/testing_file_dirs_all.csv', 'wb') as f: for dir in testing_dir_games_all: f.write(dir + '\n') print('training the model.') with open(name_training_record_file, 'w') as training_file: run_network(sess=sess, model=cvrnn, config=icehockey_cvrnn_config, log_dir=log_dir, save_network_dir=saved_network_dir, data_store=data_store_dir, source_data_dir=source_data_dir, training_dir_games_all=training_dir_games_all, testing_dir_games_all=tmp_testing_dir_games_all, player_id_cluster_dir=player_id_cluster_dir, training_file=training_file) sess.close()
def run(): validate_embedding_tag = '1001' predicted_target = 'action' is_probability = True if predicted_target == 'action' else False de_config_path = "../environment_settings/ice_hockey_{0}_de.yaml".format( predicted_target) de_config = DEEmbedCongfig.load(de_config_path) local_test_flag = False # saved_network_dir, log_dir = get_model_and_log_name(config=icehockey_cvrnn_config) if local_test_flag: data_store_dir = "/Users/liu/Desktop/Ice-hokcey-data-sample/feature-sample" dir_games_all = os.listdir(data_store_dir) training_dir_games_all = os.listdir(data_store_dir) testing_dir_games_all = os.listdir(data_store_dir) saved_network_dir = None else: data_store_dir = de_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/" dir_games_all = os.listdir(data_store_dir) # shuffle(dir_games_all) # randomly shuffle the list training_dir_games_all = dir_games_all[0:len(dir_games_all) / 10 * 8] validating_dir_games_all = dir_games_all[len(dir_games_all) / 10 * 8:len(dir_games_all) / 10 * 9] # testing_dir_games_all = dir_games_all[len(dir_games_all)/10*9:] testing_dir_games_all = dir_games_all[-len(dir_games_all) / 10:] # TODO: testing saved_network_dir, log_dir = get_model_and_log_name( config=de_config, model_catagoery='de_embed', train_flag=False, embedding_tag=validate_embedding_tag) number_of_total_game = len(dir_games_all) de_config.Learn.number_of_total_game = number_of_total_game sess = tf.Session() model = DeterministicEmbedding(config=de_config, is_probability=is_probability) model.build(validate_embedding_tag) model() sess.run(tf.global_variables_initializer()) if not local_test_flag: if not os.path.exists(saved_network_dir): os.mkdir(saved_network_dir) # save the training and testing dir list if os.path.exists(saved_network_dir + '/training_file_dirs_all.csv'): os.rename(saved_network_dir + '/training_file_dirs_all.csv', saved_network_dir + '/bak_training_file_dirs_all.csv') if os.path.exists(saved_network_dir + '/testing_file_dirs_all.csv'): os.rename(saved_network_dir + '/testing_file_dirs_all.csv', saved_network_dir + '/bak_testing_file_dirs_all.csv') # save the training and testing dir list with open(saved_network_dir + '/training_file_dirs_all.csv', 'wb') as f: for dir in dir_games_all[0:len(dir_games_all) / 10 * 8]: f.write(dir + '\n') with open(saved_network_dir + '/testing_file_dirs_all.csv', 'wb') as f: for dir in dir_games_all[len(dir_games_all) / 10 * 9:]: f.write(dir + '\n') run_network(sess=sess, model=model, config=de_config, training_dir_games_all=training_dir_games_all, testing_dir_games_all=testing_dir_games_all, data_store=data_store_dir, predicted_target=predicted_target, save_network_dir=saved_network_dir, validate_embedding_tag=validate_embedding_tag) sess.close()
def run(): local_test_flag = False player_id_type = 'local_id' if player_id_type == 'ap_cluster': player_id_cluster_dir = '../sport_resource/ice_hockey_201819/player_id_ap_cluster.json' predicted_target = '_PlayerPositionClusterAP' # playerId_ elif player_id_type == 'km_cluster': player_id_cluster_dir = '../sport_resource/ice_hockey_201819/player_id_km_cluster.json' predicted_target = '_PlayerPositionClusterKM' # playerId_ elif player_id_type == 'local_id': player_id_cluster_dir = '../sport_resource/ice_hockey_201819/local_player_id_2018_2019.json' predicted_target = '_PlayerLocalId' # playerId_ else: player_id_cluster_dir = None predicted_target = '' icehockey_tpp_cvrnn_config_path = "../environment_settings/icehockey_cvrnn{0}_config.yaml".format( predicted_target) icehockey_cvrnn_config = TPPCVRNNConfig.load( icehockey_tpp_cvrnn_config_path) saved_network_dir, log_dir = get_model_and_log_name( config=icehockey_cvrnn_config, model_catagoery='tpp_cvrnn') if local_test_flag: data_store_dir = "/Users/liu/Desktop/Ice-hokcey-data-sample/feature-sample" dir_games_all = os.listdir(data_store_dir) training_dir_games_all = os.listdir(data_store_dir) testing_dir_games_all = os.listdir(data_store_dir) else: data_store_dir = icehockey_cvrnn_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/" dir_games_all = os.listdir(data_store_dir) # shuffle(dir_games_all) # randomly shuffle the list training_dir_games_all = dir_games_all[0:len(dir_games_all) / 10 * 8] # testing_dir_games_all = dir_games_all[len(dir_games_all)/10*9:] testing_dir_games_all = dir_games_all[-10:] # TODO: testing number_of_total_game = len(dir_games_all) icehockey_cvrnn_config.Learn.number_of_total_game = number_of_total_game sess = tf.Session() tpp_cvrnn = TPPCVRNN(config=icehockey_cvrnn_config) tpp_cvrnn() sess.run(tf.global_variables_initializer()) if not local_test_flag: if not os.path.exists(saved_network_dir): os.mkdir(saved_network_dir) # save the training and testing dir list if os.path.exists(saved_network_dir + '/training_file_dirs_all.csv'): os.rename(saved_network_dir + '/training_file_dirs_all.csv', saved_network_dir + '/bak_training_file_dirs_all.csv') if os.path.exists(saved_network_dir + '/testing_file_dirs_all.csv'): os.rename(saved_network_dir + '/testing_file_dirs_all.csv', saved_network_dir + '/bak_testing_file_dirs_all.csv') # save the training and testing dir list with open(saved_network_dir + '/training_file_dirs_all.csv', 'wb') as f: for dir in dir_games_all[0:len(dir_games_all) / 10 * 8]: f.write(dir + '\n') with open(saved_network_dir + '/testing_file_dirs_all.csv', 'wb') as f: for dir in dir_games_all[len(dir_games_all) / 10 * 9:]: f.write(dir + '\n') run_network(sess=sess, model=tpp_cvrnn, config=icehockey_cvrnn_config, log_dir=log_dir, save_network_dir=saved_network_dir, data_store=data_store_dir, training_dir_games_all=training_dir_games_all, testing_dir_games_all=testing_dir_games_all, player_id_cluster_dir=player_id_cluster_dir) sess.close()
sess_nn = tf.InteractiveSession() model_nn = validate_model_initialization(sess_nn=sess_nn, model_category=model_category, config=icehockey_model_config) acc_all = [] ll_all = [] # running_numbers = [0,1,2,3,4] running_numbers = [0] with open('./results/player_id_acc_' + model_category + '_' + str(model_number) + player_info + msg_bounding+sparse_msg+'_q_cv', 'wb') as file_writer: for running_number in running_numbers: saved_network_dir, log_dir = get_model_and_log_name(config=icehockey_model_config, model_catagoery=model_category, running_number=running_number) testing_dir_games_all = [] # with open('../../sport_resource/ice_hockey_201819/testing_file_dirs_all.csv', 'rb') as f: with open(saved_network_dir + '/testing_file_dirs_all.csv', 'rb') as f: testing_dir_all = f.readlines() for testing_dir in testing_dir_all: testing_dir_games_all.append(str(int(testing_dir))) model_data_store_dir = icehockey_model_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/" source_data_store = '/Local-Scratch/oschulte/Galen/2018-2019/' # data_name = get_data_name(icehockey_model_config, model_category, model_number) print(model_category + '_' + str(model_number) + player_info)
def run_calibration(): model_type = 'cvrnn' player_info = '_box' apply_old = False apply_difference = True if model_type == 'cvrnn': model_number = 2101 predicted_target = '_PlayerLocalId' icehockey_config_path = "../../environment_settings/icehockey_cvrnn{0}_config{1}.yaml".format( predicted_target, player_info) config = CVRNNCongfig.load(icehockey_config_path) elif model_type == 'lstm_diff': model_number = 2101 icehockey_config_path = "../../environment_settings/ice_hockey_predict_score_diff_lstm{0}.yaml".format( player_info) config = LSTMQsCongfig.load(icehockey_config_path) else: raise ValueError('incorrect model type {0}'.format(model_type)) calibration_features = [ 'period', 'scoreDifferential', 'zone', 'manpowerSituation', 'home_away' ] calibration_bins = { 'period': { 'feature_name': ('period'), 'range': (1, 2, 3, 4) }, 'scoreDifferential': { 'feature_name': ('scoreDifferential'), 'range': range(-10, 10) }, 'zone': { 'feature_name': ('zone'), 'range': ('dz', 'nz', 'oz') }, 'manpowerSituation': { 'feature_name': ('manpowerSituation'), 'range': ('shortHanded', 'evenStrength', 'powerPlay') }, 'home_away': { 'feature_name': ('home_away'), 'range': (1, 0) }, # TODO: we must add the home/away label } source_data_dir = '/Local-Scratch/oschulte/Galen/2018-2019/' model_data_store_dir = '/Local-Scratch/oschulte/Galen/Ice-hockey-data/2018-2019' testing_dir_games_all = [] # with open('../../sport_resource/ice_hockey_201819/testing_file_dirs_all.csv', 'rb') as f: saved_network_dir, log_dir = get_model_and_log_name( config=config, model_catagoery=model_type) # with open(saved_network_dir + '/testing_file_dirs_all.csv', 'rb') as f: with open( '../../sport_resource/ice_hockey_201819/' + '/testing_file_dirs_all.csv', 'rb') as f: testing_dir_all = f.readlines() for testing_dir in testing_dir_all: testing_dir_games_all.append( str(int(testing_dir)) + '-playsequence-wpoi.json') Cali = Calibration(bins=calibration_bins, source_data_dir=source_data_dir, calibration_features=calibration_features, config=config, model_data_store_dir=model_data_store_dir, apply_old=apply_old, apply_difference=apply_difference, model_type=model_type, model_number=model_number, player_info=player_info, calibration_type='score_diff', testing_dir_all=testing_dir_games_all, focus_actions_list=[]) Cali.construct_bin_dicts() Cali.aggregate_calibration_values() Cali.compute_distance() print Cali.save_calibration_dir
def run(): training = True running_number = 4 local_test_flag = False player_id_type = 'local_id' rnn_type = '_lstm' predict_action = '_predict_next_goal' player_info = '' if player_id_type == 'ap_cluster': player_id_cluster_dir = '../sport_resource/ice_hockey_201819/player_id_ap_cluster.json' predicted_target = '_PlayerPositionClusterAP' # playerId_ elif player_id_type == 'km_cluster': player_id_cluster_dir = '../sport_resource/ice_hockey_201819/player_id_km_cluster.json' predicted_target = '_PlayerPositionClusterKM' # playerId_ elif player_id_type == 'local_id': player_id_cluster_dir = '../sport_resource/ice_hockey_201819/local_player_id_2018_2019.json' predicted_target = '_PlayerLocalId' # playerId_ else: player_id_cluster_dir = None predicted_target = '' icehockey_encoder_config_path = "../environment_settings/" \ "icehockey_stats{1}_encoder{0}{2}" \ "_config{3}.yaml".format(predicted_target, rnn_type, predict_action, player_info) icehockey_encoder_config = EncoderConfig.load(icehockey_encoder_config_path) Prediction_MemoryBuffer.set_cache_memory(cache_number=icehockey_encoder_config.Arch.Predict.output_node) saved_network_dir, log_dir = get_model_and_log_name(config=icehockey_encoder_config, model_catagoery='encoder', running_number=running_number) if local_test_flag: data_store_dir = "/Users/liu/Desktop/Ice-hokcey-data-sample/feature-sample" dir_games_all = os.listdir(data_store_dir) training_dir_games_all = os.listdir(data_store_dir) testing_dir_games_all = os.listdir(data_store_dir) validate_dir_games_all = os.listdir(data_store_dir) tmp_testing_dir_games_all = os.listdir(data_store_dir) source_data_dir = '/Users/liu/Desktop/Ice-hokcey-data-sample/data-sample/' else: source_data_dir = '/Local-Scratch/oschulte/Galen/2018-2019/' data_store_dir = icehockey_encoder_config.Learn.save_mother_dir + "/oschulte/Galen/Ice-hockey-data/2018-2019/" dir_games_all = os.listdir(data_store_dir) if running_number == 0: training_dir_games_all = dir_games_all[ 0: len(dir_games_all) / 5 * 4 - running_number * len(dir_games_all) / 5] else: training_dir_games_all = dir_games_all[ 0: len(dir_games_all) / 5 * 4 - running_number * len(dir_games_all) / 5] \ + dir_games_all[-running_number * len(dir_games_all) / 5:] test_validate_dir_games_all = [item for item in dir_games_all if item not in training_dir_games_all] testing_dir_games_all = test_validate_dir_games_all[:len(test_validate_dir_games_all)/2] validate_dir_games_all = test_validate_dir_games_all[len(test_validate_dir_games_all) / 2:] tmp_testing_dir_games_all = testing_dir_games_all[-10:] # TODO: it is a small running testing, not the real one number_of_total_game = len(dir_games_all) icehockey_encoder_config.Learn.number_of_total_game = number_of_total_game sess = tf.Session() encoder = Encoder_NN(config=icehockey_encoder_config) encoder() sess.run(tf.global_variables_initializer()) if training: if not local_test_flag: if not os.path.exists(saved_network_dir): os.mkdir(saved_network_dir) # save the training and testing dir list if os.path.exists(saved_network_dir + '/training_file_dirs_all.csv'): os.rename(saved_network_dir + '/training_file_dirs_all.csv', saved_network_dir + '/bak_training_file_dirs_all_{0}.csv' .format(datetime.date.today().strftime("%Y%B%d"))) if os.path.exists(saved_network_dir + '/testing_file_dirs_all.csv'): os.rename(saved_network_dir + '/testing_file_dirs_all.csv', saved_network_dir + '/bak_testing_file_dirs_all_{0}.csv' .format(datetime.date.today().strftime("%Y%B%d"))) if os.path.exists(saved_network_dir + '/validate_file_dirs_all.csv'): os.rename(saved_network_dir + '/validate_file_dirs_all.csv', saved_network_dir + '/bak_validate_file_dirs_all_{0}.csv' .format(datetime.date.today().strftime("%Y%B%d"))) with open(saved_network_dir + '/training_file_dirs_all.csv', 'wb') as f: for dir in training_dir_games_all: f.write(dir + '\n') with open(saved_network_dir + '/validate_file_dirs_all.csv', 'wb') as f: for dir in validate_dir_games_all: f.write(dir + '\n') with open(saved_network_dir + '/testing_file_dirs_all.csv', 'wb') as f: for dir in testing_dir_games_all: f.write(dir + '\n') print('training the model.') run_network(sess=sess, model=encoder, config=icehockey_encoder_config, log_dir=log_dir, save_network_dir=saved_network_dir, data_store=data_store_dir, source_data_dir=source_data_dir, training_dir_games_all=training_dir_games_all, testing_dir_games_all=tmp_testing_dir_games_all, player_id_cluster_dir=player_id_cluster_dir) sess.close() else: print('testing the model') model_number = 9301 testing_dir_games_all = dir_games_all[len(dir_games_all) / 10 * 9:] saver = tf.train.Saver() model_path = saved_network_dir + '/' + icehockey_encoder_config.Learn.data_name + '-game--{0}'.format( str(model_number)) # save_network_dir + '/' + config.Learn.data_name + '-game-' saver.restore(sess, model_path) print 'successfully load data from' + model_path validate_model(testing_dir_games_all, data_store=data_store_dir, source_data_dir=source_data_dir, config=icehockey_encoder_config, sess=sess, model=encoder, player_id_cluster_dir=player_id_cluster_dir, train_game_number=None, validate_cvrnn_flag=True, validate_td_flag=True, validate_diff_flag=True, validate_predict_flag=True) sess.close()