Ejemplo n.º 1
0
def getBatchBalanced(data,batch_size,hero_feature_indicies,classification_labels,get_death_times=False):

    no_one_dies_mask = data["label_who_dies_next_10"].values > 0.5

    num_sample_from_die = int(batch_size * 10.0/11)
    num_sample_from_not_die = batch_size - num_sample_from_die

    have_enough_unique_data = sum(~no_one_dies_mask) > num_sample_from_die 
    data_batch_die = data[~no_one_dies_mask].sample(n=num_sample_from_die,replace=(have_enough_unique_data == False))

    have_enough_unique_data = sum(no_one_dies_mask) > num_sample_from_not_die 
    data_batch_not_die = data[no_one_dies_mask].sample(n=num_sample_from_not_die,replace=(have_enough_unique_data == False))


    data_batch = pd.concat([data_batch_die,data_batch_not_die])

    # this is done only now, because it would takes up too much memory
    data_batch = postprocess_data(data_batch)

    hero_features = []
    for i in range(10):
        hero_features.append(data_batch.values[:,hero_feature_indicies[i]].astype(np.float32))

    classification_label_values = data_batch.values[:,classification_labels].astype(np.float32)

    if get_death_times == True:
        labels = [(i,label) for i,label in  enumerate(list(data))]
        death_time_indicies = preprocess.labels_to_indicies(preprocess.select_features_by_name("time_until_next_death",labels))
        death_times = data_batch.values[:,death_time_indicies]
        return hero_features,classification_label_values,death_times

    return hero_features,classification_label_values
Ejemplo n.º 2
0
def getBalancedBatchForPlayer(data,player_i,batch_size,hero_feature_indicies,classification_labels,get_death_times=False):

    # get a batch, where half of the time the selected player dies, the other half not
    #player_dies_mask = data["label_who_dies_next_" + str(player_i)].values > 0.5
    # classification label indicies is contains the indicies of labels like "player_0_die_in_10"
    player_dies_mask = data.values[:,classification_labels[player_i]] > 0.5

    num_sample_from_die = int(batch_size/2)
    num_sample_from_not_die = batch_size - num_sample_from_die

    have_enough_unique_data = sum(player_dies_mask) > num_sample_from_die 
    data_batch_die = data[player_dies_mask].sample(n=num_sample_from_die,replace=(have_enough_unique_data == False))

    have_enough_unique_data = sum(~player_dies_mask) > num_sample_from_not_die 
    data_batch_not_die = data[~player_dies_mask].sample(n=num_sample_from_not_die,replace=(have_enough_unique_data == False))

    data_batch = pd.concat([data_batch_die,data_batch_not_die])

    # this is done only now, because it would takes up too much memory
    data_batch = postprocess_data(data_batch)

    hero_features = []
    for i in range(10):
        hero_features.append(data_batch.values[:,hero_feature_indicies[i]].astype(np.float32))

    classification_label_values = data_batch.values[:,classification_labels].astype(np.float32)

    if get_death_times == True:
        labels = [(i,label) for i,label in  enumerate(list(data))]
        death_time_indicies = preprocess.labels_to_indicies(preprocess.select_features_by_name("time_until_next_death",labels))
        death_times = data_batch.values[:,death_time_indicies]
        return hero_features,classification_label_values,death_times

    return hero_features,classification_label_values
def getLabelIndicies_die_in_20(data):
    example_row = data.sample(n=1, replace=False)
    example_row = postprocess_data(example_row)
    labels = [(i, label) for i, label in enumerate(list(example_row))]
    classification_label = preprocess.labels_to_indicies(
        preprocess.select_features_by_name("die_in_20", labels))
    return classification_label
Ejemplo n.º 4
0
def getFeatureIndicies(data,exclude_if_contains_list = None,only_include_list = None):
    # get an example row
    example_row = data.sample(n=1,replace=False)
    example_row = postprocess_data(example_row)

    labels = [(i,label) for i,label in enumerate(list(example_row))]
    
    if only_include_list is not None:
        filtered_labels = []
        for i,label in labels:
            for include_label in only_include_list:
                if include_label in label:
                    filtered_labels.append((i,label))
        labels = filtered_labels


    if exclude_if_contains_list is not None:
        for exclude_pattern in exclude_if_contains_list:
            labels = [(i,label) for i,label in labels if exclude_pattern not in label]

    hero_feature_indicies = []
    for i in range(10):
        hero_labels = preprocess.select_features_of_hero(i,labels)
        hero_feature_indicies.append(preprocess.labels_to_indicies(hero_labels))
        hero_feature_indicies[-1].append(0) # dont forget the time

    return hero_feature_indicies
all_pred = [[] for model_path in modelPathList]

per_sec_pred = [[[] for _ in range(20)] for model_path in modelPathList]

for i in range(match_per_worker):
    match_index = first_match_index_for_this_task + i
    if match_index >= num_matches:
        continue

    print("Loading match ", match_index)

    data = data_loader.load_data_from_file(dataPathList[match_index])

    # get death times
    labels = [(i, label) for i, label in enumerate(list(data))]
    death_time_indicies = preprocess.labels_to_indicies(
        preprocess.select_features_by_name("time_until_next_death", labels))
    death_times = data.values[:, death_time_indicies].astype(np.float32)

    for model_i, (model_path,
                  config_path) in enumerate(zip(modelPathList,
                                                configPathList)):

        with open(config_path) as f:
            config = commentjson.load(f)

        modeldata = test_model.load_pytorch_model(model_path, config, data)

        with torch.no_grad():
            y = modeldata.fullGameLabels
            X = [torch.from_numpy(hero_X) for hero_X in modeldata.fullGameData]
            pred = modeldata.model(X)