def getBatchBalanced(data,batch_size,hero_feature_indicies,classification_labels,get_death_times=False): no_one_dies_mask = data["label_who_dies_next_10"].values > 0.5 num_sample_from_die = int(batch_size * 10.0/11) num_sample_from_not_die = batch_size - num_sample_from_die have_enough_unique_data = sum(~no_one_dies_mask) > num_sample_from_die data_batch_die = data[~no_one_dies_mask].sample(n=num_sample_from_die,replace=(have_enough_unique_data == False)) have_enough_unique_data = sum(no_one_dies_mask) > num_sample_from_not_die data_batch_not_die = data[no_one_dies_mask].sample(n=num_sample_from_not_die,replace=(have_enough_unique_data == False)) data_batch = pd.concat([data_batch_die,data_batch_not_die]) # this is done only now, because it would takes up too much memory data_batch = postprocess_data(data_batch) hero_features = [] for i in range(10): hero_features.append(data_batch.values[:,hero_feature_indicies[i]].astype(np.float32)) classification_label_values = data_batch.values[:,classification_labels].astype(np.float32) if get_death_times == True: labels = [(i,label) for i,label in enumerate(list(data))] death_time_indicies = preprocess.labels_to_indicies(preprocess.select_features_by_name("time_until_next_death",labels)) death_times = data_batch.values[:,death_time_indicies] return hero_features,classification_label_values,death_times return hero_features,classification_label_values
def getBalancedBatchForPlayer(data,player_i,batch_size,hero_feature_indicies,classification_labels,get_death_times=False): # get a batch, where half of the time the selected player dies, the other half not #player_dies_mask = data["label_who_dies_next_" + str(player_i)].values > 0.5 # classification label indicies is contains the indicies of labels like "player_0_die_in_10" player_dies_mask = data.values[:,classification_labels[player_i]] > 0.5 num_sample_from_die = int(batch_size/2) num_sample_from_not_die = batch_size - num_sample_from_die have_enough_unique_data = sum(player_dies_mask) > num_sample_from_die data_batch_die = data[player_dies_mask].sample(n=num_sample_from_die,replace=(have_enough_unique_data == False)) have_enough_unique_data = sum(~player_dies_mask) > num_sample_from_not_die data_batch_not_die = data[~player_dies_mask].sample(n=num_sample_from_not_die,replace=(have_enough_unique_data == False)) data_batch = pd.concat([data_batch_die,data_batch_not_die]) # this is done only now, because it would takes up too much memory data_batch = postprocess_data(data_batch) hero_features = [] for i in range(10): hero_features.append(data_batch.values[:,hero_feature_indicies[i]].astype(np.float32)) classification_label_values = data_batch.values[:,classification_labels].astype(np.float32) if get_death_times == True: labels = [(i,label) for i,label in enumerate(list(data))] death_time_indicies = preprocess.labels_to_indicies(preprocess.select_features_by_name("time_until_next_death",labels)) death_times = data_batch.values[:,death_time_indicies] return hero_features,classification_label_values,death_times return hero_features,classification_label_values
def getLabelIndicies_die_in_20(data): example_row = data.sample(n=1, replace=False) example_row = postprocess_data(example_row) labels = [(i, label) for i, label in enumerate(list(example_row))] classification_label = preprocess.labels_to_indicies( preprocess.select_features_by_name("die_in_20", labels)) return classification_label
def getFeatureIndicies(data,exclude_if_contains_list = None,only_include_list = None): # get an example row example_row = data.sample(n=1,replace=False) example_row = postprocess_data(example_row) labels = [(i,label) for i,label in enumerate(list(example_row))] if only_include_list is not None: filtered_labels = [] for i,label in labels: for include_label in only_include_list: if include_label in label: filtered_labels.append((i,label)) labels = filtered_labels if exclude_if_contains_list is not None: for exclude_pattern in exclude_if_contains_list: labels = [(i,label) for i,label in labels if exclude_pattern not in label] hero_feature_indicies = [] for i in range(10): hero_labels = preprocess.select_features_of_hero(i,labels) hero_feature_indicies.append(preprocess.labels_to_indicies(hero_labels)) hero_feature_indicies[-1].append(0) # dont forget the time return hero_feature_indicies
all_pred = [[] for model_path in modelPathList] per_sec_pred = [[[] for _ in range(20)] for model_path in modelPathList] for i in range(match_per_worker): match_index = first_match_index_for_this_task + i if match_index >= num_matches: continue print("Loading match ", match_index) data = data_loader.load_data_from_file(dataPathList[match_index]) # get death times labels = [(i, label) for i, label in enumerate(list(data))] death_time_indicies = preprocess.labels_to_indicies( preprocess.select_features_by_name("time_until_next_death", labels)) death_times = data.values[:, death_time_indicies].astype(np.float32) for model_i, (model_path, config_path) in enumerate(zip(modelPathList, configPathList)): with open(config_path) as f: config = commentjson.load(f) modeldata = test_model.load_pytorch_model(model_path, config, data) with torch.no_grad(): y = modeldata.fullGameLabels X = [torch.from_numpy(hero_X) for hero_X in modeldata.fullGameData] pred = modeldata.model(X)