def prepare_test_data(onehot_mapping): test_teams_file = join(config.TEAMS_TXT_DIR, TestTeamConfig().out_file) teams = [] with open(test_teams_file) as in_file: teams.extend([x.strip() for x in in_file.readlines()]) num_pokes = len(onehot_mapping.keys()) valid_teams = [] for team_str in teams: team = team_str.split(",") team = team[:-1] for poke_idx in range(len(team)): num_invalid_pokes = 0 temp_arr = team[:poke_idx] + team[(poke_idx + 1):] line_encode = npy.zeros(num_pokes) for poke in temp_arr: if poke in onehot_mapping.keys(): line_encode[onehot_mapping[poke]] = 1 else: num_invalid_pokes += 1 result_encode = npy.zeros(1) result_encode[0] = onehot_mapping.get(team[poke_idx], len(onehot_mapping)) if num_invalid_pokes >= GenerateTeamsConfig().teamLength - 1: continue line_encode = [ x / (GenerateTeamsConfig().teamLength - 1 - num_invalid_pokes) for x in line_encode ] valid_teams.append(npy.concatenate([line_encode, result_encode])) super_mat = npy.zeros((len(valid_teams), num_pokes + 1), dtype=npy.float16) idx = 0 for row in valid_teams: super_mat[idx, :] = row idx += 1 data_mat = super_mat[:, 0:num_pokes] label_mat = super_mat[:, num_pokes] return data_mat, label_mat
def beam_step(base_teams, names): new_teams_set = set() for team in base_teams: for name in names: cand_new_team = team + [name] # Filter out invalid team combinations if team_validity_check(cand_new_team) is False: continue # Sort alphabetically to avoid duplications cand_new_team.sort() new_teams_set.add("|".join(cand_new_team)) new_teams = [x.split("|") for x in new_teams_set] new_teams_prob = [{ "team": x, "prob": calculate_team_prob(x, m_prob, c_prob) } for x in new_teams] new_teams_prob.sort(key=lambda k: k["prob"], reverse=True) actual_teams = new_teams_prob[0:GenerateTeamsConfig().beamSearchThreshold] return [x["team"] for x in actual_teams], sum([x["prob"] for x in actual_teams])
def get_filenames(): # Read the filenames from the config gt_conf = GenerateTeamsConfig() m_conf = ModelConfig() output = [] for suff in m_conf.targetFilesSuffix: out_filename = "{}_{}".format(gt_conf.outputFilenamePrefix, suff) uniq_filename = "{}_{}".format(gt_conf.uniqueFilenamePrefix, suff) output.append( (join(config.TEAMS_TXT_DIR, out_filename), join(config.TEAMS_TXT_DIR, uniq_filename))) return output
def generate_teams_beam(info, m_prob, c_prob): names = m_prob.keys() base_teams = [[x] for x in m_prob.keys()] start_time = time() counter = 1 while counter < GenerateTeamsConfig().teamLength: step_time = time() base_teams, team_probs = beam_step(base_teams, names) gc.collect() print("Beam Step: {} ({}) | Time Elapsed {:.02f}s ({:.02f}s)".format( counter, len(base_teams), time() - step_time, time() - start_time)) counter += 1 print("Time Spent Generating Teams: {:.02f}s".format(time() - start_time)) return base_teams, team_probs
def generate_teams(info, m_prob, c_prob, out_filename): total_teams = round(info["ability_total"]) names = m_prob.keys() main_outfile = open(out_filename, "w") num_to_search = 0 team_combinations = None total_prob = None if GenerateTeamsConfig().method == "comb": team_combinations = combinations(names, GenerateTeamsConfig().teamLength) num_to_search = factorial(len(names)) / ( factorial(GenerateTeamsConfig().teamLength) * factorial(len(names) - GenerateTeamsConfig().teamLength)) total_prob = 1 elif GenerateTeamsConfig().method == "beam": team_combinations, total_prob = generate_teams_beam( info, m_prob, c_prob) num_to_search = len(team_combinations) else: raise RuntimeError( "Invalid Method (either 'comb' or 'beam' expected: {}".format( GenerateTeamsConfig().method)) print( "Number of Teams to Search: {num} | Total Teams: {tot} | Total Prob: {prob}" .format(num=num_to_search, tot=total_teams, prob=total_prob)) counter = 0 buffer = [] pct_chosen = 0 num_chosen = 0 for team in team_combinations: counter += 1 team = list(team) if team_validity_check(team) is False: continue team_prob = calculate_team_prob(team, m_prob, c_prob) / total_prob team_appearances = sample_team(total_teams - num_chosen, team_prob, pct_chosen) # team_appearances = round(total_teams * team_prob) if team_appearances > 0: num_chosen += team_appearances pct_chosen += team_prob buffer.append(team + ["{}\n".format(str(team_appearances))]) if counter % GenerateTeamsConfig().checkpointIteration == 0: print("Iteration: {}".format(counter), "| Total Probability Captured: {}".format(pct_chosen), "| Num Teams Generated: {}".format(num_chosen)) if len(buffer) % GenerateTeamsConfig().bufferSize == 0: main_outfile.writelines([",".join(x) for x in buffer]) buffer = [] if buffer: main_outfile.writelines([",".join(x) for x in buffer]) if counter % GenerateTeamsConfig().checkpointIteration != 0: print("Iteration: {}".format(counter), "| Total Probability Captured: {}".format(pct_chosen), "| Num Teams Generated: {}".format(num_chosen)) main_outfile.close()
mprob_file.write("{},{}\n".format(name, m_prob[name]["pct"])) mprob_file.close() def write_cprob(c_prob, fname): cprob_file = open(fname, 'w') cprob_file.write("base.pokemon,cond.pokemon,true.prob\n") for base_name in c_prob.keys(): for cond_name in c_prob[base_name].keys(): cprob_file.write("{},{},{}\n".format( base_name, cond_name, c_prob[base_name][cond_name]["prob"])) if __name__ == "__main__": gt_conf = GenerateTeamsConfig() args = parse_args(gt_conf) files = args.files thresholds = args.thresholds if len(files) != len(thresholds): if len(thresholds) == 1: thresholds = thresholds * len(files) else: raise RuntimeError("Invalid File and Thresholds lengths") idxs = range(len(files)) teams = [] for idx in idxs: filename = files[idx]
def onehot_encode_data(file_names): # Onehot encode a list of names m_conf = ModelConfig() data_fnames = [] uniq_fnames = [] for name_tuple in file_names: data_fnames.append(name_tuple[0]) uniq_fnames.append(name_tuple[1]) onehot_mapping = gen_onehot_map(uniq_fnames) num_pokes = len(onehot_mapping.keys()) total_lines = 0 for fname in data_fnames: total_lines += count_file_lines(fname) super_mat = npy.zeros( (total_lines * GenerateTeamsConfig().teamLength, num_pokes + 1), dtype=npy.float16) weight_mat = npy.zeros((total_lines * GenerateTeamsConfig().teamLength, 1), dtype=npy.int16) idx = 0 for fname in data_fnames: print("Reading {}".format(fname)) file_i = open(fname) while True: line = file_i.readline().strip() if not line: break team = line.split(",") weight = team[-1] team = team[:-1] for poke_idx in range(len(team)): temp_arr = team[:poke_idx] + team[(poke_idx + 1):] line_encode = npy.zeros(num_pokes) for poke in temp_arr: line_encode[onehot_mapping[poke]] = 1 / 5 result_encode = npy.zeros(1) result_encode[0] = onehot_mapping[team[poke_idx]] weight_encode = npy.zeros(1) weight_encode[0] = int(weight) super_mat[idx, :] = npy.concatenate( [line_encode, result_encode]) weight_mat[idx, 0] = weight_encode idx += 1 super_duper_mat = npy.hstack((super_mat, weight_mat)) npy.random.shuffle(super_duper_mat) data_mat = super_duper_mat[:, 0:num_pokes] label_mat = super_duper_mat[:, num_pokes] weight_mat = super_duper_mat[:, num_pokes + 1] # Save the files locally matrix_datafile = join(config.TEAMS_MAT_DIR, m_conf.matrixDataFile) label_datafile = join(config.TEAMS_MAT_DIR, m_conf.matrixLabelFile) weights_datafile = join(config.TEAMS_MAT_DIR, m_conf.matrixWeightFile) onehot_outfile = join(config.TEAMS_MAT_DIR, "{}.json".format(m_conf.onehotFile)) print("Saving Data...") npy.save(matrix_datafile, data_mat) npy.save(label_datafile, label_mat) npy.save(weights_datafile, weight_mat) with open(onehot_outfile, 'w') as doot: json.dump(onehot_mapping, doot) return onehot_mapping, data_mat, label_mat, weight_mat