def run_selector(self, H_in, nodes, unkn_plus_mask, plus_mask, unkn_unab_mask, oracle, curr_out): non_value_mask = 1 * ( (plus_mask + unkn_unab_mask + unkn_plus_mask) > 0) H = H_in.copy() * (1 - non_value_mask) + 1e10 * non_value_mask H, unkn_plus_mask, plus_mask, unkn_unab_mask = self.remove_H_non_value_rows( H, unkn_plus_mask, plus_mask, unkn_unab_mask) # formatter.print_unformated_mat(H, unkn_plus_mask, plus_mask, unkn_unab_mask) formatter.printt('\tExploit vs. Explore\n', self.log) num_select = self.out_lim - self.num_rand rand_nodes = [] selected = self.subset_exploiter.select_subset_peer( H, nodes, num_select, plus_mask, oracle, curr_out) # selected = self.count_exploiter.select_best_peer(H, nodes, num_select, plus_mask, oracle, curr_out) if len(selected) != num_select: # selected = self.count_exploiter.select_best_peer(H, nodes, num_select, plus_mask, oracle) # if len(selected) != num_select: rand_nodes = self.draw_random_peers(nodes, num_select - len(selected), oracle) self.state = selected + rand_nodes exploits = selected + rand_nodes explore_nodes = self.explorer.get_exploring_peers( nodes, exploits, self.num_rand, oracle) self.state = selected + rand_nodes + explore_nodes return exploits, explore_nodes
def select_applicable_subset(i, oracle, sorted_subset, curr_out, log): selected = [] for subset in sorted_subset: comb = subset[0] # only ask oracle for nodes not connected outgoing nodes = [j for j in comb if j not in curr_out] un = oracle.can_i_connect(i, nodes) if len(un) == 0: selected = list(comb) break formatter.printt('\t\tsubset skipped: cannot conn {}\n'.format(un), log) return selected
def log_epoch(self, e, curr_outs, churn_stars): topo_text = "" for star_i in self.stars: star_i_text = "\t\t {} outs {}\n".format(star_i, curr_outs[star_i]) topo_text += star_i_text for star_i in self.stars: epoch_text = "\n\n\t\t\t*** Epoch {e} stars {stars} ***\n".format( e=e, stars=self.stars) epoch_text = "\n\n\t\t\t*** Epoch {e} churn stars {stars} ***\n".format( e=e, stars=churn_stars) epoch_text += topo_text epoch_text += '\n' formatter.printt(epoch_text, self.log_files[star_i])
def custom_update_loop(self, X, easy_estimate_by_row, known_pos_by_row, element_peer_scores, tensor_ind_map, T, num_var): A = torch.rand(num_var, dtype=torch.float32) C = torch.rand(T, requires_grad=True, dtype=torch.float32) A.requires_grad_() relu = torch.nn.ReLU() criterion = ElementLoss() s_time = time.time() for e in range(self.epochs): loss = criterion(X, A, C, easy_estimate_by_row, known_pos_by_row, element_peer_scores, tensor_ind_map, T) loss.backward() with torch.no_grad(): num_loss = loss.item() if e % 100 == 0: formatter.printt( '{} normalized loss {} in {}\n'.format( e, num_loss, round(time.time() - s_time, 2)), self.log_file) s_time = time.time() if C.isnan().any() or A.isnan().any(): print(e, 'Loss explodes before relu', A.grad, C.grad) sys.exit(1) A = relu(A - self.lr * A.grad) C = relu(C - self.lr * C.grad) # A = A - self.lr * A.grad # C = C - self.lr * C.grad A.grad = None C.grad = None A.requires_grad_() C.requires_grad_() # print(e, 'normalized loss', num_loss, 'in', ) formatter.printt( '{} normalized loss {} in {}\n'.format( e, num_loss, round(time.time() - s_time, 2)), self.log_file) completed_A = A.detach().numpy() C_out = C.detach().numpy() C_out = C_out.reshape((len(C_out), 1)) return completed_A, C_out
def sort_subset_rank_plus(self, contributor_rankplus, node_ids, num_select): subset_coverage_size = {} subset_coverage_rank = {} all_cands = sorted(contributor_rankplus.keys()) for comb in itertools.combinations(all_cands, num_select): peer_contributors = defaultdict( dict) # key is benefiting peer, value is contributing peers coverage_rank = 0 for con in comb: cs = contributor_rankplus[con] for plus_cover, occurance in cs.plus_cover.items(): for b in plus_cover: peer_contributors[b][con] = occurance # not the following, does not encourage many high coverage, may due to close # coverage_rank += occurance coverage_rank += len(plus_cover) * occurance subset_coverage_size[comb] = len( peer_contributors) # simplest by coverage subset_coverage_rank[comb] = coverage_rank formatter.printt( '\t\t\tsubset_score {} {} {} {}\n'.format( comb, subset_coverage_size[comb], subset_coverage_rank[comb], list(peer_contributors.keys())), self.log) subset_tuple = [] for subset, coverage_size in subset_coverage_size.items(): total_rank = 0 coverage_rank = subset_coverage_rank[subset] for p in subset: total_rank += contributor_rankplus[p].rank subset_tuple.append( (subset, total_rank, coverage_rank, coverage_size)) subset_tuple = sorted(subset_tuple, key=lambda tup: tup[1], reverse=True) subset_tuple = sorted(subset_tuple, key=lambda tup: tup[2], reverse=True) subset_tuple = sorted(subset_tuple, key=lambda tup: tup[3], reverse=True) return subset_tuple
def get_subset_ranks_with_plus(self, H, node_ids, plus_mask): row_mins = np.argmin(H, axis=1) contributor_record = {} # key is peer, value is record for i, r in enumerate(row_mins): index = node_ids[r] if index not in contributor_record: contributor_record[index] = RankPlus(index, 1, 0) else: contributor_record[index].rank += 1 contributor_record[index].update_plus(plus_mask[i], node_ids) for c, record in contributor_record.items(): formatter.printt( '\t\t\tcontrib:{} plus_cover:{} rank:{}\n'.format( c, record.plus_cover, record.rank), self.log) return contributor_record
def optim_loop(self, X, easy_estimate_by_row, known_pos_by_row, element_peer_scores, tensor_ind_map, T, num_var): A = torch.rand(num_var, dtype=torch.float32) C = torch.rand(T, requires_grad=True, dtype=torch.float32) A.requires_grad_() relu = torch.nn.ReLU() criterion = ElementLoss() s_time = time.time() optimizer = torch.optim.Adam([A, C], lr=self.lr) for e in range(self.epochs): optimizer.zero_grad() loss = criterion(X, A, C, easy_estimate_by_row, known_pos_by_row, element_peer_scores, tensor_ind_map, T) loss.backward() optimizer.step() with torch.no_grad(): num_loss = loss.item() if e % 100 == 0: formatter.printt( '{} normalized loss {} in {}\n'.format( e, num_loss, round(time.time() - s_time, 2)), self.log_file) # print(e, 'normalized loss', num_loss, 'in', round(time.time()-s_time,2)) s_time = time.time() if C.isnan().any() or A.isnan().any(): print(e, 'Loss explodes before relu', A.grad, C.grad) sys.exit(1) if self.stopper.stop_early(num_loss / float(num_var), e): break formatter.printt( '{} normalized loss {} in {}\n'.format( e, num_loss, round(time.time() - s_time, 2)), self.log_file) # print(e, 'normalized loss', num_loss, 'in', round(time.time()-s_time,2)) completed_A = A.detach().numpy() C_out = C.detach().numpy() C_out = C_out.reshape((len(C_out), 1)) return completed_A, C_out
def get_exploring_peers(self, curr_peers, keep_peers, num_explore, oracle): self.dead_loop_breaker += 1 if self.dead_loop_breaker > 2: print( 'called get_exploring_peers more than 2. Cannot explore new peers even in a whole pool', self.dead_loop_breaker) # sys.exit(1) for p in curr_peers + keep_peers: self.hist_explored_peers[p] = self.counter pools = self.known_peers cands = list(pools.difference(self.hist_explored_peers.keys())) # print(self.counter, 'seen', len(self.hist_explored_peers), ' peers', sorted(list(self.hist_explored_peers.keys()))) if len(cands) >= num_explore: explores = [] np.random.shuffle(cands) for i in cands: if len(oracle.can_i_connect(self.id, [i])) == 0: explores.append(i) if num_explore == len(explores): break if num_explore == len(explores): # explores = list(np.random.choice(list(cands), num_explore, replace=False)) formatter.printt( '\t\tExplore(deplet full):\t\t{}\n'.format( sorted(explores)), self.log) self.counter += 1 self.dead_loop_breaker = 0 return explores else: num_explore -= len(explores) self.hist_explored_peers.clear() new_pool_explore = self.get_exploring_peers( curr_peers, keep_peers, num_explore, oracle) formatter.printt( '\t\tExplore(deplet insu oracle):\t\t{}\n'.format( sorted(explores + new_pool_explore)), self.log) self.counter += 1 self.dead_loop_breaker = 0 return explores + new_pool_explore else: explores = [] for i in cands: if len(oracle.can_i_connect(self.id, [i])) == 0: explores.append(i) if num_explore == len(explores): break num_explore -= len(cands) self.hist_explored_peers.clear() new_pool_explore = self.get_exploring_peers( curr_peers, keep_peers, num_explore, oracle) # print('\t\tExplore(deplet):\t\t'+str(sorted(explores+new_pool_explore))) formatter.printt( '\t\tExplore(deplet insu cand):\t\t{}\n'.format( sorted(explores + new_pool_explore)), self.log) self.counter += 1 self.dead_loop_breaker = 0 return explores + new_pool_explore
def select_best_peer(self, H, nodes, num_select, plus_mask, oracle, curr_out): T = H.shape[0] N = H.shape[1] sorted_rank_plus = self.sort_peers_by_count(H, nodes, plus_mask) formatter.printt('\t\tranks by num plus {}\n'.format(sorted_rank_plus), self.log) # TODO it is possible two col have a double tie, maybe add some random number selected = [] if len(sorted_rank_plus) >= num_select: selected = select_applicable_peers(self.id, oracle, sorted_rank_plus, num_select, curr_out) formatter.printt( '\t\tExploit( rank ):\t\t{}\n'.format(selected), self.log) return selected else: for i in range(len(sorted_rank_plus)): node_id = sorted_rank_plus[i][0] if node_id in curr_out: selected.append(node_id) continue if len(oracle.can_i_connect(self.id, [node_id])) == 0: selected.append(node_id) if len(selected) == num_select: break formatter.printt( '\t\tExploit(plus insuff):\t\t{}\n'.format(selected), self.log) return selected
def select_subset_peer(self, H, nodes, num_select, plus_mask, oracle, curr_out): T, N = H.shape contributor_records = self.get_subset_ranks_with_plus( H, nodes, plus_mask) all_cands = sorted(contributor_records.keys()) selected = [] if len(all_cands) >= num_select: sorted_subset = self.sort_subset_rank_plus(contributor_records, nodes, num_select) selected = select_applicable_subset(self.id, oracle, sorted_subset, curr_out, self.log) formatter.printt( '\t\tranks by num plus cover {}\n'.format(sorted_subset), self.log) formatter.printt( '\t\tExploit( subset ):\t\t{}\n'.format(selected), self.log) return selected else: for i in all_cands: if i in curr_out: selected.append(i) continue if len(oracle.can_i_connect(self.id, [i])) == 0: selected.append(i) formatter.printt( '\t\tExploit(subs insuff):\t\t{}\n'.format(selected), self.log) return selected
def draw_random_peers(self, excludes, num, oracle): pools = set([i for i in range(self.num_node)]) pools = pools.difference(set(excludes)) pools.remove(self.id) conns = [] if len(pools) >= num: # conns = list(np.random.choice(list(pools), num, replace=False)) pools = list(pools) np.random.shuffle(pools) conns = self.find_x_rand_to_conn(pools, num, oracle) formatter.printt('\t\tExploit(random):\t\t{}\n'.format(conns), self.log) else: pools = set([i for i in range(self.num_node)]) pools = pools.difference(set(selected)) pools.remove(self.id) np.random.shuffle(pools) conns = self.find_x_rand_to_conn(pools, num, oracle) formatter.printt( '\t\tExploit(random+reuse):\t\t{}\n'.format(conns), self.log) return conns
def get_best_ranks(self, H, nodes, num_select): T = H.shape[0] N = H.shape[1] ranks = self.get_ranks(H, nodes) sorted_ranks = sorted(ranks.items(), key=lambda item: item[1], reverse=True) formatter.printt('\tExploit vs. Explore\n', self.log) formatter.printt('\t\tranks by num min {}\n'.format(str(sorted_ranks))) selected = [] if len(sorted_ranks) >= num_select: for i in range(num_select): node_id = sorted_ranks[i][0] selected.append(node_id) print('\t\tAdapt:\t\tselected ' + str(selected)) return selected else: for i in range(len(sorted_ranks)): node_id = sorted_ranks[i][0] selected.append(node_id) # pools = set([i for i in range(self.num_node)]) # pools = pools.difference(set(nodes)) # pools.remove(self.id) # if len(pools) >= num_select-len(selected): # conns = list(np.random.choice(list(pools), num_select-len(selected), replace=False)) # print('\t\tExploit:Miss\t\t'+str(sorted(selected))+' rand '+str(conns)) # else: # pools = set([i for i in range(self.num_node)]) # pools = pools.difference(set(selected)) # pools.remove(self.id) # conns = list(np.random.choice(list(pools), num_select-len(selected), replace=False)) # print('\t\tExploit:Miss+random\t\tselected '+str(sorted(selected))+' rand '+str(sorted(conns))) return selected
def run_element_completion(self, X_in, M_in, nM_in, max_time): X_in = X_in * M_in X_in = X_in / max_time T, N = X_in.shape unknown_pos_by_row = defaultdict(list) known_pos_by_row = defaultdict(list) known_plus_pos_by_row = defaultdict(list) unknown_pos_by_col = defaultdict(list) known_pos_by_col = defaultdict(list) known_plus_pos_by_col = defaultdict(list) num_known_numeric = 0 num_known_plus = 0 for i in range(T): for j in range(N): if M_in[i, j] == 0 and nM_in[i, j] == 1: unknown_pos_by_row[i].append(j) unknown_pos_by_col[j].append(i) elif M_in[i, j] == 0 and nM_in[i, j] == 0: known_plus_pos_by_row[i].append(j) known_plus_pos_by_col[j].append(i) num_known_plus += 1 elif M_in[i, j] == 1: known_pos_by_row[i].append(j) known_pos_by_col[j].append(i) num_known_numeric += 1 else: print('Error. Unknown i,j classification') sys.exit(1) # easy estimatible, i.e. does it has known value or '+' in other rows easy_estimate_by_row = defaultdict(list) ambi_estimate_by_row = defaultdict(list) un_estimate_by_row = defaultdict(list) easy_estimates = [] ambi_estimates = [] un_estimates = [] indicators = {} for i in range(T): for j in unknown_pos_by_row[i]: indicator = Indicator(i, j) # something is just impossible if len(known_pos_by_col[j]) > 0 or len( known_plus_pos_by_col[j]) > 0: if len(known_pos_by_row[i]) <= 1: # if has no common element, or at most 1 common -> impossible indicator.set_unable() elif not self.has_common_known_by_row( indicator, known_pos_by_col, known_pos_by_row, known_plus_pos_by_col, 2): indicator.set_unable() else: # the rest is estimatible if len(known_plus_pos_by_col[j]) == 0: # other >0 # the case is easy, when all peering row only contains values, not + indicator.set_easy() elif len( known_pos_by_col[j] ) - 1 < self.top_n_peer: # when selecting top_n_peer, it must have selected + if there is not sufficient indicator.set_ambiguous() else: # both indicator.set_ambiguous() else: print('Error. all other columns are unknown', i, j) sys.exit(1) indicators[(i, j)] = indicator # easy to estimate e_class = indicator.get() if e_class == 0: easy_estimate_by_row[i].append(j) easy_estimates.append((i, j)) elif e_class == 1: ambi_estimate_by_row[i].append(j) ambi_estimates.append((i, j)) elif e_class == 2: un_estimate_by_row[i].append(j) un_estimates.append((i, j)) else: print('Error. Unknown class', e_class) sys.exit(1) init_easy_num = len(easy_estimates) init_ambi_num = len(ambi_estimates) init_unab_num = len(un_estimates) # Start construct pytorch X = torch.tensor(X_in, dtype=torch.float32) M = torch.tensor(M_in, dtype=torch.float32) nM = torch.tensor(nM_in, dtype=torch.float32) tensor_ind_map = {} # compute row-wise score in tensor element_peer_scores = defaultdict( list) # key is ij, value is list of (weight, k-row) softmax_func = torch.nn.Softmax(dim=0) plus_estimates = [] plus_estimate_by_row = defaultdict(list) for i, j in ambi_estimates: ind = indicators[(i, j)] num_peer = len(ind.peering_rows) scores = np.ones(T) * float("inf") for k in sorted(ind.peering_rows): common_elements = ind.peering_rows[k] sel_diff = X_in[i][common_elements] - X_in[k][common_elements] scores[k] = np.var(sel_diff, ddof=1) contain_plus = False sorted_ind = np.argsort(scores) for k in sorted_ind[:self.top_n_peer]: if j in known_plus_pos_by_row[k]: contain_plus = True # print(i, j, 'detect ambiguous in tops ', k, sorted_ind) break if not contain_plus: low_peer = sorted_ind[self.top_n_peer - 1] low_selected_score = scores[low_peer] for k in sorted_ind[self.top_n_peer:]: if scores[k] == low_selected_score: if j in known_plus_pos_by_row[k]: # print(i, j, 'detect ambiguous by looking extension', k, sorted_ind) contain_plus = True break else: break if not contain_plus: ind.set_easy() easy_estimate_by_row[i].append(j) easy_estimates.append((i, j)) # print('reset ambi',i,j,'to easy') else: plus_estimate_by_row[i].append(j) plus_estimates.append((i, j)) t = 0 for i, j in easy_estimates: tensor_ind_map[(i, j)] = t t += 1 ind = indicators[(i, j)] num_peer = len(ind.peering_rows) scores = torch.ones(T) * float("inf") for k in sorted(ind.peering_rows): common_elements = ind.peering_rows[k] sel_diff = X[i][common_elements] - X[k][common_elements] scores[k] = torch.var(sel_diff, unbiased=True) randomized_scores = scores + torch.rand(T, dtype=float) * 0.00001 # print(i,j,k, scores, sel_diff) num_peer = min(self.top_n_peer, num_peer) topk, topk_ind = torch.topk(randomized_scores, num_peer, largest=False) weight = softmax_func(-1 * topk) for c in range(len(topk_ind)): # row in py number, not in tensor k = topk_ind[c].item() select_mask = torch.zeros(N, dtype=int) select_mask[j] = 1 select_mask[ind.peering_rows[k]] = 1 element_peer_scores[(i, j)].append( (weight[c], k, select_mask.gt(0))) total_cell_num = num_known_numeric + num_known_plus + len( easy_estimates) + len(plus_estimates) + len(un_estimates) table_text = "\tTable summary:(T,N) T*N {} {} {}\n".format(T, N, T * N) table_text += '\t\tknown numeric {}\n'.format(num_known_numeric) table_text += '\t\tknown plus {}\n'.format(num_known_plus) table_text += '\t\testimating unknown (ambi)+easy {} -> {}\n'.format( init_easy_num, len(easy_estimates)) table_text += '\t\testimating unknown (ambi)+plus {} -> {}\n'.format( init_ambi_num, len(plus_estimates)) table_text += '\t\testimating unknown unab {} -> {}\n'.format( init_unab_num, len(un_estimates)) table_text += '\t\ttotal num cell {}\n\n'.format(total_cell_num) formatter.printt(table_text, self.log_file) # print('\tTable summary:(T,N) T*N', ) # print('\t\tknown numeric', num_known_numeric) # print('\t\tknown plus', num_known_plus) # print('\t\testimating unknown (ambi)+easy', init_easy_num, '->', len(easy_estimates)) # print('\t\testimating unknown (ambi)+plus', init_ambi_num, '->', len(plus_estimates)) # print('\t\testimating unknown unab ', init_unab_num, '->', len(un_estimates)) # print('\t\ttotal num cell', total_cell_num) assert (total_cell_num == T * N) X = X * nM num_var = len(easy_estimates) # completed_A, C_out = self.custom_update_loop(X, easy_estimate_by_row, known_pos_by_row,element_peer_scores, tensor_ind_map, T, num_var) if num_var != 0: completed_A, C_out = self.optim_loop(X, easy_estimate_by_row, known_pos_by_row, element_peer_scores, tensor_ind_map, T, num_var) else: C_out = np.zeros((T, 1)) C_out = C_out * max_time X_out = (X.numpy() * max_time + C_out) * nM_in + (1 - nM_in) * 9999 unkn_plus_mask = np.zeros((T, N)) unkn_unab_mask = np.zeros((T, N)) for i, j in easy_estimates: a = completed_A[tensor_ind_map[(i, j)]] * max_time X_out[i, j] = a for i, j in plus_estimates: unkn_plus_mask[i, j] = 1 X_out[i, j] = 7777 for i, j in un_estimates: X_out[i, j] = 5555 unkn_unab_mask[i, j] = 1 return X_out, C_out, unkn_plus_mask, unkn_unab_mask
def get_truth_distance(self, star_i, interested_peers, epoch): # construct graph G = nx.Graph() for i, node in self.nodes.items(): if i == star_i: for u in interested_peers: # only connect interested edge from the interested node delay = self.ld[i][ u] + node.node_delay / 2 + self.nodes[u].node_delay / 2 if i == u: print('self loop', i) sys.exit(1) G.add_edge(i, u, weight=delay) else: for u in node.outs: # not connecting incoming edge to the interested node if u != star_i: delay = self.ld[i][ u] + node.node_delay / 2 + self.nodes[ u].node_delay / 2 if i == u: print('self loop', i) sys.exit(1) G.add_edge(i, u, weight=delay) dists = {} # key is the target pub, value is the best peer and length formatter.printt('\tEval peers {}\n'.format(interested_peers), self.log_files[star_i]) pubs = [k for k, v in self.roles.items() if v == 'PUB'] for m in pubs: # the closest distance start_t = time.time() length, path = nx.single_source_dijkstra(G, source=star_i, target=m, weight='weight') assert (len(path) >= 0) topo_length = None line_len = None j = None if len(path) == 1: # itself assert (star_i == m) topo_length = 0 line_len = 0 j = star_i else: j = path[1] topo_length = length - self.proc_delay[ j] / 2.0 + self.proc_delay[m] / 2.0 line_len = self.ld[star_i][m] + self.proc_delay[m] # line_len_comp = int(math.ceil(math.sqrt( # (self.loc[star_i][0]-self.loc[m][0])**2+ # (self.loc[star_i][1]-self.loc[m][1])**2 ))) +self.proc_delay[m] # assert(line_len_comp == line_len) dists[m] = (j, round(topo_length, 3), round(line_len, 3)) runtime = round(time.time() - start_t) dist_text = "\t\tpub {m} by peer {j} opt-diff {opt_diff} topo_len {topo_len} line_len {line_len} in {runtime} sec\n".format( m=m, j=j, opt_diff=round(topo_length - line_len, 1), topo_len=round(topo_length), line_len=round(line_len), runtime=runtime) formatter.printt(dist_text, self.log_files[star_i]) self.dists_hist[star_i].append((epoch, dists))