def replaceability(nash_att, nash_def, payoffmatrix_def, payoffmatrix_att, child_partition): """ This function calculates the replaceability of heuristics. :param child_partition: :return: """ rep = {} positions = find_heuristic_position(child_partition) pos_to_method = {y: x for x, y in positions.iteritems()} nash_indicator_att = nash_att.copy() nash_indicator_att[nash_indicator_att > 0] = 1 nash_indicator_def = nash_def.copy() nash_indicator_def[nash_indicator_def > 0] = 1 dPayoff = np.round(np.sum(nash_def * payoffmatrix_def * nash_att), decimals=2) aPayoff = np.round(np.sum(nash_def * payoffmatrix_att * nash_att), decimals=2) utils_def = np.round(np.sum(payoffmatrix_def * nash_att, axis=1), decimals=2) utils_att = np.round(np.sum(nash_def * payoffmatrix_att, axis=0), decimals=2) utils_def = np.reshape(utils_def, newshape=np.shape(utils_att)) for method in child_partition: start, end = positions[method] utils_def[start:end] = -10000 utils_att[start:end] = -10000
def regret_curves(payoffmatrix_def, payoffmatrix_att, child_partition): """ Calculate the epsilon of each subgame. :param ne_dict: {"baseline": game.nasheq} :return: """ curves_att = {} curves_def = {} num_str, _ = np.shape(payoffmatrix_att) positions = find_heuristic_position(child_partition) for method in child_partition: curves_att[method] = [] curves_def[method] = [] start, end = positions[method] submatrix_def = payoffmatrix_def[start:end, :] submatrix_att = payoffmatrix_att[:, start:end] subgame_def = payoffmatrix_def[start:end, start:end] subgame_att = payoffmatrix_att[start:end, start:end] zeros = np.zeros(end - start) for epoch in np.arange(end): subsubgame_def = subgame_def[:epoch, :epoch] subsubgame_att = subgame_att[:epoch, :epoch] # TODO: Error: line 4:2: Expecting outcome or payoff nash_att, nash_def = do_gambit_analysis(subsubgame_def, subsubgame_att, maxent=False, minent=True) # TODO: Is this correct?? NO. nash_def = zeros[len(nash_def)] + nash_def nash_att = zeros[len(nash_att)] + nash_att nash_def = np.reshape(nash_def, newshape=(len(nash_def), 1)) payoff_vect_att = np.sum(nash_def * submatrix_def, axis=0) payoff_vect_def = np.sum(submatrix_att * nash_att, axis=1) payoffmatrix_def = np.reshape(payoffmatrix_def, newshape=np.shape(payoff_vect_att)) nash_payoff_att = np.round(np.sum(nash_def * subgame_att * nash_att), decimals=2) nash_payoff_def = np.round(np.sum(nash_def * subgame_def * nash_att), decimals=2) deviation_att = np.max(payoff_vect_att) deviation_def = np.max(payoff_vect_def) regret_att = np.maximum(deviation_att - nash_payoff_att, 0) regret_def = np.maximum(deviation_def - nash_payoff_def, 0) curves_att[method].append(regret_att) curves_def[method].append(regret_def) return curves_att, curves_def
def eligibility_trace(nasheq_dict, child_partition, gamma=0.7): ''' This function calculates the eligibility trace of strategies based on game.nasheq. :param nasheq_dict: {"baselines": game.nasheq} :param gamma: :return: ''' position = find_heuristic_position(child_partition) total_num_str = 0 for method in child_partition: total_num_str += child_partition[method] et_dict_def = {} et_dict_att = {} nash_thred = 0.05 # Construct eligibility trace. for method in nasheq_dict: et_dict_def[method] = np.zeros(child_partition[method]) et_dict_att[method] = np.zeros(child_partition[method]) nasheq = nasheq_dict[method] for epoch in np.arange(1, child_partition[method] + 1): et_dict_att[method] *= gamma ne_att = nasheq[epoch][1][1:] ne_att[ne_att <= nash_thred] = 0 ne_att[ne_att > nash_thred] = -2 et_dict_att[method][:len(ne_att)] += ne_att et_dict_def[method] *= gamma ne_def = nasheq[epoch][0][1:] ne_def[ne_def <= nash_thred] = 0 ne_def[ne_def > nash_thred] = -2 et_dict_def[method][:len(ne_def)] += ne_def # Put strategies into the queue with the eligibility trace as priority. pq_def = {} pq_att = {} for method in et_dict_def: pq_def[method] = pq() pq_att[method] = pq() start, end = position[method] idx_str = start + np.arange(child_partition[method]) idx_et_pair_def = zip(et_dict_def[method], idx_str) idx_et_pair_att = zip(et_dict_att[method], idx_str) for pair in idx_et_pair_def: pq_def[method].put(pair) for pair in idx_et_pair_att: pq_att[method].put(pair) return pq_def, pq_att
def formal_regret_curves(payoffmatrix_def, payoffmatrix_att, child_partition): positions = find_heuristic_position(child_partition) curves_dict_def = {} curves_dict_att = {} for method in child_partition: curves_dict_def[method] = [] curves_dict_att[method] = [] for epoch in np.arange(40): for method in child_partition: if method == 'RM': continue start, end = positions[method] print(start, end) submatrix_att = payoffmatrix_att[start:start + epoch + 1, start:start + epoch + 1] submatrix_def = payoffmatrix_def[start:start + epoch + 1, start:start + epoch + 1] # print('X:', start, start+epoch+1) nash_att, nash_def = do_gambit_analysis(submatrix_def, submatrix_att, maxent=True) nash_def = np.reshape(nash_def, newshape=(len(nash_def), 1)) ne_payoff_def = np.sum(nash_def * submatrix_def * nash_att) ne_payoff_att = np.sum(nash_def * submatrix_att * nash_att) dev_def = np.max( np.sum(payoffmatrix_def[:, start:start + epoch + 1] * nash_att, axis=1)) dev_att = np.max( np.sum(nash_def * payoffmatrix_att[start:start + epoch + 1, :], axis=0)) curves_dict_def[method].append( np.maximum(dev_def - ne_payoff_def, 0)) curves_dict_att[method].append( np.maximum(dev_att - ne_payoff_att, 0)) return curves_dict_def, curves_dict_att
def NE_regret(regret_vect_att, regret_vect_def, payoffmatrix_att, payoffmatrix_def, child_partition): """ Calculate the regret of each heuristic with respect to the combined game. The strategies of each heuristic only\ include those in the NE of each heuristic. :param regret_vect: regret vector calculated from combined game. :param ne_dict: {"baseline": {0: np.array([1,0,1,0...]), 1: np.array([1,0,1,0...])}, "RS": np.array([0,0,1,0...])} when a strategy is in a NE, that strategy is indicated by 1. :return: """ regret_dict = {} positions = find_heuristic_position(child_partition) for method in child_partition: start, end = positions[method] print(start, end) submatrix_att = payoffmatrix_att[start:end, start:end] submatrix_def = payoffmatrix_def[start:end, start:end] # submatrix_att = payoffmatrix_att[start:start+32, start:start+32] # submatrix_def = payoffmatrix_def[start:start+32, start:start+32] nash_att, nash_def = do_gambit_analysis(submatrix_def, submatrix_att, maxent=True) nash_att[nash_att > 0] = 1 nash_def[nash_def > 0] = 1 regret_dict[method] = { 0: np.sum(regret_vect_def[start:end] * nash_def) / np.sum(nash_def), 1: np.sum(regret_vect_att[start:end] * nash_att) / np.sum(nash_att) } # regret_dict[method] = {0: np.sum(regret_vect_def[start:start+30] * nash_def) / np.sum(nash_def), # 1: np.sum(regret_vect_att[start:start+30] * nash_att) / np.sum(nash_att)} return regret_dict
def regret_fixed_matrix(payoffmatrix_def, payoffmatrix_att, child_partition): positions = find_heuristic_position(child_partition) for method in child_partition: start, end = positions[method] print(start, end) # submatrix_att = payoffmatrix_att[start:end, start:end] # submatrix_def = payoffmatrix_def[start:end, start:end] submatrix_att = payoffmatrix_att[start:start + 32, start:start + 32] submatrix_def = payoffmatrix_def[start:start + 32, start:start + 32] nash_att, nash_def = do_gambit_analysis(submatrix_def, submatrix_att, maxent=True) nash_def = np.reshape(nash_def, newshape=(len(nash_def), 1)) ne_payoff_def = np.sum(nash_def * submatrix_def * nash_att) ne_payoff_att = np.sum(nash_def * submatrix_att * nash_att) # dev_def = np.max(np.sum(payoffmatrix_def[:, start:end] * nash_att, axis=1)) # dev_att = np.max(np.sum(nash_def * payoffmatrix_att[start:end, :], axis=0)) dev_def = np.max( np.sum(payoffmatrix_def[:, start:start + 32] * nash_att, axis=1)) # print(np.argmax(np.sum(payoffmatrix_def[:, start:end] * nash_att, axis=1))) dev_att = np.max( np.sum(nash_def * payoffmatrix_att[start:start + 32, :], axis=0)) # print(np.argmax(np.sum(nash_def * payoffmatrix_att[start:end, :], axis=0))) print('------------------------------------------') print("The current method is ", method) print("The defender's regret is", np.maximum(dev_def - ne_payoff_def, 0)) print("The attacker's regret is", np.maximum(dev_att - ne_payoff_att, 0)) print("==================================================")
def ne_search_wo_etrace(payoff_matrix_def, payoff_matrix_att, child_partition): position = find_heuristic_position(child_partition) total_num_str = 0 init_flag = False # Assume 2 methods. Find candidate NE in the first subgame. for method in child_partition: if not init_flag: nash_att, nash_def = do_gambit_analysis( payoff_matrix_def[:child_partition[method], : child_partition[method]], payoff_matrix_att[:child_partition[method], : child_partition[method]], maxent=False, minent=False) # Strategies of current game strategy_set_def = list(range(child_partition[method])) strategy_set_att = list(range(child_partition[method])) init_flag = True total_num_str += child_partition[method] # Extend the NE to the length of the combined game. zeros_def = np.zeros(total_num_str) zeros_att = np.zeros(total_num_str) zeros_def[:len(nash_def)] = nash_def zeros_att[:len(nash_def)] = nash_att nash_def = zeros_def nash_att = zeros_att # indicator_matrix records which cell has been simulated in the payoff matrix. indicator_matrix = np.zeros((total_num_str, total_num_str)) for method in position: start, end = position[method] indicator_matrix[start:end, start:end] = 1 nash_def_T = np.reshape(nash_def, newshape=(len(nash_def), 1)) payoff_def = np.sum(nash_def_T * payoff_matrix_def * nash_att) payoff_att = np.sum(nash_def_T * payoff_matrix_att * nash_att) support_idx_def = np.where(nash_def > 0)[0] support_idx_att = np.where(nash_att > 0)[0] # Change to simulation mode when simulation is needed. while True: for x in support_idx_def: indicator_matrix[x, :] = 1 for y in support_idx_att: indicator_matrix[:, y] = 1 dev_payoff_def = np.max(np.sum(payoff_matrix_def * nash_att, axis=1)) dev_payoff_att = np.max(np.sum(nash_def_T * payoff_matrix_att, axis=0)) dev_def = np.argmax(np.sum(payoff_matrix_def * nash_att, axis=1)) dev_att = np.argmax(np.sum(nash_def * payoff_matrix_att, axis=0)) if dev_payoff_def <= payoff_def and dev_payoff_att <= payoff_att: break if dev_payoff_def > payoff_def: strategy_set_def.append(dev_def) strategy_set_def.sort() indicator_matrix[dev_def, :] = 1 else: strategy_set_def.append(dev_def) strategy_set_def.sort() indicator_matrix[dev_def, :] = 1 if dev_payoff_att > payoff_att: strategy_set_att.append(dev_att) strategy_set_att.sort() indicator_matrix[:, dev_att] = 1 else: strategy_set_att.append(dev_att) strategy_set_att.sort() indicator_matrix[:, dev_att] = 1 subgame_def = es(strategy_set_def, strategy_set_att, payoff_matrix_def) subgame_att = es(strategy_set_def, strategy_set_att, payoff_matrix_att) # print(strategy_set_def, strategy_set_att) # print(np.shape(subgame_def), np.shape(subgame_att)) nash_att, nash_def = do_gambit_analysis(subgame_def, subgame_att, maxent=False, minent=False) nash_def_T = np.reshape(nash_def, newshape=(len(nash_def), 1)) payoff_def = np.sum(nash_def_T * subgame_def * nash_att) payoff_att = np.sum(nash_def_T * subgame_att * nash_att) zeros_def = np.zeros(total_num_str) zeros_att = np.zeros(total_num_str) for pos, value in zip(strategy_set_att, nash_att): zeros_att[pos] = value for pos, value in zip(strategy_set_def, nash_def): zeros_def[pos] = value nash_def = zeros_def nash_att = zeros_att support_idx_def = np.where(nash_def > 0)[0] support_idx_att = np.where(nash_att > 0)[0] # Payoff matrix of subgames denotes 5. for method in position: start, end = position[method] indicator_matrix[start:end, start:end] = 5 return nash_def, nash_att, indicator_matrix