Example #1
0
def prepare_test_data(onehot_mapping):
    test_teams_file = join(config.TEAMS_TXT_DIR, TestTeamConfig().out_file)
    teams = []
    with open(test_teams_file) as in_file:
        teams.extend([x.strip() for x in in_file.readlines()])

    num_pokes = len(onehot_mapping.keys())

    valid_teams = []
    for team_str in teams:
        team = team_str.split(",")
        team = team[:-1]

        for poke_idx in range(len(team)):
            num_invalid_pokes = 0

            temp_arr = team[:poke_idx] + team[(poke_idx + 1):]

            line_encode = npy.zeros(num_pokes)
            for poke in temp_arr:
                if poke in onehot_mapping.keys():
                    line_encode[onehot_mapping[poke]] = 1
                else:
                    num_invalid_pokes += 1

            result_encode = npy.zeros(1)
            result_encode[0] = onehot_mapping.get(team[poke_idx],
                                                  len(onehot_mapping))

            if num_invalid_pokes >= GenerateTeamsConfig().teamLength - 1:
                continue

            line_encode = [
                x / (GenerateTeamsConfig().teamLength - 1 - num_invalid_pokes)
                for x in line_encode
            ]

            valid_teams.append(npy.concatenate([line_encode, result_encode]))

    super_mat = npy.zeros((len(valid_teams), num_pokes + 1), dtype=npy.float16)
    idx = 0
    for row in valid_teams:
        super_mat[idx, :] = row
        idx += 1

    data_mat = super_mat[:, 0:num_pokes]
    label_mat = super_mat[:, num_pokes]

    return data_mat, label_mat
Example #2
0
def beam_step(base_teams, names):
    new_teams_set = set()

    for team in base_teams:
        for name in names:
            cand_new_team = team + [name]

            # Filter out invalid team combinations
            if team_validity_check(cand_new_team) is False:
                continue

            # Sort alphabetically to avoid duplications
            cand_new_team.sort()
            new_teams_set.add("|".join(cand_new_team))

    new_teams = [x.split("|") for x in new_teams_set]
    new_teams_prob = [{
        "team": x,
        "prob": calculate_team_prob(x, m_prob, c_prob)
    } for x in new_teams]
    new_teams_prob.sort(key=lambda k: k["prob"], reverse=True)

    actual_teams = new_teams_prob[0:GenerateTeamsConfig().beamSearchThreshold]

    return [x["team"]
            for x in actual_teams], sum([x["prob"] for x in actual_teams])
Example #3
0
def get_filenames():
    # Read the filenames from the config
    gt_conf = GenerateTeamsConfig()
    m_conf = ModelConfig()
    output = []
    for suff in m_conf.targetFilesSuffix:
        out_filename = "{}_{}".format(gt_conf.outputFilenamePrefix, suff)
        uniq_filename = "{}_{}".format(gt_conf.uniqueFilenamePrefix, suff)
        output.append(
            (join(config.TEAMS_TXT_DIR,
                  out_filename), join(config.TEAMS_TXT_DIR, uniq_filename)))

    return output
Example #4
0
def generate_teams_beam(info, m_prob, c_prob):
    names = m_prob.keys()
    base_teams = [[x] for x in m_prob.keys()]

    start_time = time()
    counter = 1
    while counter < GenerateTeamsConfig().teamLength:
        step_time = time()
        base_teams, team_probs = beam_step(base_teams, names)

        gc.collect()
        print("Beam Step: {} ({}) | Time Elapsed {:.02f}s ({:.02f}s)".format(
            counter, len(base_teams),
            time() - step_time,
            time() - start_time))
        counter += 1

    print("Time Spent Generating Teams: {:.02f}s".format(time() - start_time))
    return base_teams, team_probs
Example #5
0
def generate_teams(info, m_prob, c_prob, out_filename):
    total_teams = round(info["ability_total"])
    names = m_prob.keys()
    main_outfile = open(out_filename, "w")

    num_to_search = 0
    team_combinations = None
    total_prob = None
    if GenerateTeamsConfig().method == "comb":
        team_combinations = combinations(names,
                                         GenerateTeamsConfig().teamLength)
        num_to_search = factorial(len(names)) / (
            factorial(GenerateTeamsConfig().teamLength) *
            factorial(len(names) - GenerateTeamsConfig().teamLength))
        total_prob = 1
    elif GenerateTeamsConfig().method == "beam":
        team_combinations, total_prob = generate_teams_beam(
            info, m_prob, c_prob)
        num_to_search = len(team_combinations)
    else:
        raise RuntimeError(
            "Invalid Method (either 'comb' or 'beam' expected: {}".format(
                GenerateTeamsConfig().method))

    print(
        "Number of Teams to Search: {num} | Total Teams: {tot} | Total Prob: {prob}"
        .format(num=num_to_search, tot=total_teams, prob=total_prob))

    counter = 0
    buffer = []
    pct_chosen = 0
    num_chosen = 0
    for team in team_combinations:
        counter += 1
        team = list(team)

        if team_validity_check(team) is False:
            continue

        team_prob = calculate_team_prob(team, m_prob, c_prob) / total_prob

        team_appearances = sample_team(total_teams - num_chosen, team_prob,
                                       pct_chosen)
        # team_appearances = round(total_teams * team_prob)

        if team_appearances > 0:
            num_chosen += team_appearances
            pct_chosen += team_prob
            buffer.append(team + ["{}\n".format(str(team_appearances))])

        if counter % GenerateTeamsConfig().checkpointIteration == 0:
            print("Iteration: {}".format(counter),
                  "| Total Probability Captured: {}".format(pct_chosen),
                  "| Num Teams Generated: {}".format(num_chosen))

        if len(buffer) % GenerateTeamsConfig().bufferSize == 0:
            main_outfile.writelines([",".join(x) for x in buffer])
            buffer = []

    if buffer:
        main_outfile.writelines([",".join(x) for x in buffer])

    if counter % GenerateTeamsConfig().checkpointIteration != 0:
        print("Iteration: {}".format(counter),
              "| Total Probability Captured: {}".format(pct_chosen),
              "| Num Teams Generated: {}".format(num_chosen))

    main_outfile.close()
Example #6
0
        mprob_file.write("{},{}\n".format(name, m_prob[name]["pct"]))

    mprob_file.close()


def write_cprob(c_prob, fname):
    cprob_file = open(fname, 'w')
    cprob_file.write("base.pokemon,cond.pokemon,true.prob\n")
    for base_name in c_prob.keys():
        for cond_name in c_prob[base_name].keys():
            cprob_file.write("{},{},{}\n".format(
                base_name, cond_name, c_prob[base_name][cond_name]["prob"]))


if __name__ == "__main__":
    gt_conf = GenerateTeamsConfig()

    args = parse_args(gt_conf)
    files = args.files
    thresholds = args.thresholds

    if len(files) != len(thresholds):
        if len(thresholds) == 1:
            thresholds = thresholds * len(files)
        else:
            raise RuntimeError("Invalid File and Thresholds lengths")

    idxs = range(len(files))
    teams = []
    for idx in idxs:
        filename = files[idx]
Example #7
0
def onehot_encode_data(file_names):
    # Onehot encode a list of names
    m_conf = ModelConfig()
    data_fnames = []
    uniq_fnames = []
    for name_tuple in file_names:
        data_fnames.append(name_tuple[0])
        uniq_fnames.append(name_tuple[1])

    onehot_mapping = gen_onehot_map(uniq_fnames)
    num_pokes = len(onehot_mapping.keys())

    total_lines = 0
    for fname in data_fnames:
        total_lines += count_file_lines(fname)

    super_mat = npy.zeros(
        (total_lines * GenerateTeamsConfig().teamLength, num_pokes + 1),
        dtype=npy.float16)
    weight_mat = npy.zeros((total_lines * GenerateTeamsConfig().teamLength, 1),
                           dtype=npy.int16)

    idx = 0
    for fname in data_fnames:
        print("Reading {}".format(fname))
        file_i = open(fname)

        while True:
            line = file_i.readline().strip()
            if not line:
                break

            team = line.split(",")
            weight = team[-1]
            team = team[:-1]

            for poke_idx in range(len(team)):
                temp_arr = team[:poke_idx] + team[(poke_idx + 1):]

                line_encode = npy.zeros(num_pokes)
                for poke in temp_arr:
                    line_encode[onehot_mapping[poke]] = 1 / 5

                result_encode = npy.zeros(1)
                result_encode[0] = onehot_mapping[team[poke_idx]]

                weight_encode = npy.zeros(1)
                weight_encode[0] = int(weight)

                super_mat[idx, :] = npy.concatenate(
                    [line_encode, result_encode])
                weight_mat[idx, 0] = weight_encode
                idx += 1

    super_duper_mat = npy.hstack((super_mat, weight_mat))
    npy.random.shuffle(super_duper_mat)

    data_mat = super_duper_mat[:, 0:num_pokes]
    label_mat = super_duper_mat[:, num_pokes]
    weight_mat = super_duper_mat[:, num_pokes + 1]

    # Save the files locally
    matrix_datafile = join(config.TEAMS_MAT_DIR, m_conf.matrixDataFile)
    label_datafile = join(config.TEAMS_MAT_DIR, m_conf.matrixLabelFile)
    weights_datafile = join(config.TEAMS_MAT_DIR, m_conf.matrixWeightFile)
    onehot_outfile = join(config.TEAMS_MAT_DIR,
                          "{}.json".format(m_conf.onehotFile))

    print("Saving Data...")
    npy.save(matrix_datafile, data_mat)
    npy.save(label_datafile, label_mat)
    npy.save(weights_datafile, weight_mat)
    with open(onehot_outfile, 'w') as doot:
        json.dump(onehot_mapping, doot)

    return onehot_mapping, data_mat, label_mat, weight_mat