def study_k_effect_facebook(): graph_file = '/home/pankaj/Sampling/data/input/social_graphs/facebook/facebook_combined.txt' N = 4039 G = read_facebook_graph(graph_file, N) influ_obj = Influence(G, 0.3, 200) degree_dict = nx.out_degree_centrality(G) val = degree_dict.values() print [N*x for x in np.sort(val)[-10:]] for k in range(10, 200, 10): a = np.argsort(val)[-k:] sample = torch.zeros(N) sample[a] = 1 print k, influ_obj(sample.numpy()).item()
def training_mc(x, adj, node_feat, net, lr1, lr2, n_epochs1, n_epochs2, mom, nsamples_mc, file_prefix, p, num_influ_iter): f = open(file_prefix + '_training_log.txt', 'w') optimizer = optim.Adam(net.parameters(), lr=lr1) for epoch in range(n_epochs1): # for epoch in range(100): #get minibatch optimizer.zero_grad() # zero the gradient buffers y = net(x, adj, node_feat) train_loss = reconstruction_loss(x, y) print "Epoch: ", epoch, " reconstruction loss = ", train_loss.item() f.write(str(0) + " " + str(epoch) + " " + str(train_loss.item()) + "\n") train_loss.backward() optimizer.step() # Does the update # torch.save(net.state_dict(), file_prefix + '_net.dat') # net.load_state_dict(torch.load(file_prefix + '_net.dat')) G = nx.DiGraph(adj[0].numpy()) influ_obj = Influence(G, p, num_influ_iter) optimizer = optim.SGD(net.parameters(), lr=lr2) # optimizer = optim.Adam(net.parameters(), lr=lr2) for epoch in range(n_epochs2): #get minibatch optimizer.zero_grad() # zero the gradient buffers y = net(x, adj, node_feat) # train_loss = kl_loss_mc_uniform(x, y, 10, influ_obj) train_loss2 = kl_loss_mc_y(x, y, nsamples_mc, influ_obj) print "Epoch: ", epoch, " KL-based loss = ", train_loss2.item() f.write(str(1) + " " + str(epoch) + " " + str(train_loss2.item()) + "\n") train_loss2.backward() optimizer.step() # Does the update torch.save(net.state_dict(), file_prefix + '_net.dat') f.close() y = net(x, adj, node_feat) return y
def variance_study(G, nsamples, k, var_file, p, num_influ_iter, if_herd, x_good_sfo, x_good_fw, x, a): N = nx.number_of_nodes(G) influ_obj = Influence(G, p, num_influ_iter) temp = [] if a == 0: for t in range(40): val = getRelax(G, x, nsamples, influ_obj, if_herd).item() temp.append((val, val, val)) else: for t in range(40): val1 = getImportanceRelax(G, x_good_sfo, x, nsamples, influ_obj, if_herd, a).item() val2 = getImportanceRelax(G, x_good_fw, x, nsamples, influ_obj, if_herd, a).item() val3 = getRelax(G, x, nsamples, influ_obj, if_herd).item() temp.append((val1, val2, val3)) relax_gt = getRelax(G, x, 200, influ_obj, if_herd).item() print('\n' * 2) print("sfo std= ", np.std([t[0] for t in temp]), " mean = ", np.mean([t[0] for t in temp])) print("fw std = ", np.std([t[1] for t in temp]), " mean = ", np.mean([t[1] for t in temp])) print("mc std = ", np.std([t[2] for t in temp]), " mean = ", np.mean([t[2] for t in temp])) print("gt = ", relax_gt) f = open(var_file, 'a', 0) f.write( str(np.std([t[0] for t in temp])) + " " + str(np.mean([t[0] for t in temp])) + " " + str(relax_gt) + "\n") f.write( str(np.std([t[1] for t in temp])) + " " + str(np.mean([t[1] for t in temp])) + " " + str(relax_gt) + "\n") f.write( str(np.std([t[2] for t in temp])) + " " + str(np.mean([t[2] for t in temp])) + " " + str(relax_gt) + "\n") f.write('\n') f.close()
def multilinear_variance_study(): file_id = int(sys.argv[1]) nsamples = int(sys.argv[2]) nNodes = 512 bufsize = 0 p = 0.4 num_influ_iter = 100 f = open(dirw + 'multilinear_variance_study_p_' + str(p) + '_N_' + str(nNodes) + '_' + str(file_id) + '_' + str(nsamples) +'.txt', 'w', bufsize) ngraphs = 10 graph_dir = "/home/pankaj/Sampling/data/input/social_graphs/N_" + str(nNodes) + "/" file_list = os.listdir(graph_dir) graph_file_list = [] for i in range(ngraphs): if 'log' not in file_list[i] and 'gt' not in file_list[i]: graph_file_list.append(file_list[i]) G = read_graph(graph_dir + graph_file_list[file_id], nNodes) influ_obj = Influence(G, p, num_influ_iter) for t in range(4): x = torch.rand(nNodes) val = [] tic = time.clock() for k in range(20): val.append(getRelax(G, x, nsamples, influ_obj, herd = False).item()) to_write_list = [file_id, np.var(val), (time.clock() - tic)/20] print ' '.join(map(str, to_write_list)) + '\n' sys.stdout.flush() f.write(' '.join(map(str, to_write_list)) + '\n')
def do_setup(self): # initialize data structures after learning the game settings self.strat_influence = Influence(self.gamestate, STRAT_DECAY) self.planner = Planner(self.gamestate, self.strat_influence)
class MyBot: def __init__(self, gamestate): # define class level variables, will be remembered between turns self.gamestate = gamestate self.planner_time = gamestate.turntime / 2 # do_setup is run once at the start of the game # after the bot has received the game settings def do_setup(self): # initialize data structures after learning the game settings self.strat_influence = Influence(self.gamestate, STRAT_DECAY) self.planner = Planner(self.gamestate, self.strat_influence) def log_turn(self, turn_no): if DETAIL_LOG and os.path.isdir('pickle'): # dump gamestate pickle_file = open('pickle/turn_' + str(self.gamestate.current_turn) + '.gamestate', 'wb') pickle.dump(self.gamestate, pickle_file) pickle_file.close() # dump influence map value pickle_file = open('pickle/turn_' + str(self.gamestate.current_turn) + '.influence', 'wb') pickle.dump(self.strat_influence, pickle_file) pickle_file.close() # do turn is run once per turn def do_turn(self): logging.debug('turn ' + str(self.gamestate.current_turn)) # detailed logging self.log_turn(self.gamestate.current_turn) # handle combat self.issue_combat_task() plan_start = self.gamestate.time_remaining() # decay strategy influence logging.debug('strat_influence.decay().start = %s' % str(self.gamestate.time_remaining())) self.strat_influence.decay() logging.debug('strat_influence.decay().finish = %s' % str(self.gamestate.time_remaining())) # use planner to set new influence self.planner.do_plan() plan_duration = plan_start - self.gamestate.time_remaining() self.planner_time = max([plan_duration, self.planner_time]) # diffuse strategy influence logging.debug('strat_influence.diffuse().start = %s' % str(self.gamestate.time_remaining())) for i in xrange(3): self.strat_influence.diffuse() if self.gamestate.time_remaining() < 50: logging.debug('stopped diffuse after %d times' % i) break logging.debug('strat_influence.diffuse().finish = %s' % str(self.gamestate.time_remaining())) # handle explorer self.issue_explore_task() logging.debug('endturn: ant_count = %d, time_elapsed = %s' % (len(self.gamestate.ant_list), self.gamestate.time_elapsed())) def issue_combat_task(self): 'combat logic' logging.debug('issue_combat_task.start = %s' % str(self.gamestate.time_remaining())) zones = battle.get_combat_zones(self.gamestate) logging.debug('zones = %s' % str(zones)) for zone in zones: logging.debug('group combat loop for = %s' % str(zone)) if len(zone[0]) > 0: battle.do_zone_combat(self.gamestate, zone) # check if we still have time left to calculate more orders if self.gamestate.time_remaining() < self.planner_time + 50: break logging.debug('issue_combat_task.finish = ' + str(self.gamestate.time_remaining())) def issue_explore_task(self): 'explore map' logging.debug('issue_explore_task.start = %s' % str(self.gamestate.time_remaining())) # loop through all my un-moved ants and set them to explore # the ant_loc is an ant location tuple in (row, col) form for cur_loc in self.gamestate.my_unmoved_ants(): all_locs = [cur_loc] + [self.gamestate.destination(cur_loc, d) for d in self.gamestate.passable_directions(cur_loc)] loc_influences = [self.strat_influence.map[loc] for loc in all_locs] best_directions = self.gamestate.direction(cur_loc, all_locs[loc_influences.index(min(loc_influences))]) if len(best_directions) > 0: self.gamestate.issue_order((cur_loc, choice(best_directions))) # check if we still have time left to calculate more orders if self.gamestate.time_remaining() < 10: break logging.debug('issue_explore_task.finish = ' + str(self.gamestate.time_remaining())) # static methods are not tied to a class and don't have self passed in # this is a python decorator @staticmethod def run(): 'parse input, update game state and call the bot classes do_turn method' gamestate = GameState() bot = MyBot(gamestate) map_data = '' while(True): try: current_line = sys.stdin.readline().rstrip('\r\n') # string new line char if current_line.lower() == 'ready': gamestate.setup(map_data) bot.do_setup() gamestate.finish_turn() map_data = '' elif current_line.lower() == 'go': gamestate.update(map_data) # call the do_turn method of the class passed in bot.do_turn() gamestate.finish_turn() map_data = '' else: map_data += current_line + '\n' except EOFError: break except KeyboardInterrupt: raise except: # don't raise error or return so that bot attempts to stay alive traceback.print_exc(file=sys.stderr) sys.stderr.flush()
def main(features_path, dataset_name, loss_type, test_mode=False, force_refresh=False): # Check and create needed files. cache_dir = os.getenv('CACHE_DIR', None) if not cache_dir: cache_dir = '/tmp/influence-cache' os.mkdir(cache_dir) dataset_cache_dir = os.path.join(cache_dir, dataset_name) results_dir = os.getenv('RESULTS_DIR') if not results_dir: results_dir = '/tmp/influence-results' os.mkdir(results_dir) dataset_results_dir = os.path.join(results_dir, dataset_name) if not os.path.exists(dataset_cache_dir): os.mkdir(dataset_cache_dir) os.mkdir(os.path.join(dataset_cache_dir, 'inv_hvp')) # Load the jedi dataset. dataset = JediDataset(features_path=features_path, name=dataset_name) # Add intercept to train and test data. dataset.train_X = np.concatenate((np.ones( (dataset.train_X.shape[0], 1)), dataset.train_X), axis=1) dataset.test_X = np.concatenate((np.ones( (dataset.test_X.shape[0], 1)), dataset.test_X), axis=1) # num features/dimensions nD = dataset.train_X.shape[1] # Compute the logistic regression model. Ignore appending intercept to the data. Load from cache if the model already exists. model_path = os.path.join( dataset_cache_dir, 'model_{}_{}.dat'.format(dataset_name, loss_type)) if not force_refresh and os.path.exists(model_path): logger.info( 'Loading the model from the cache file - {}'.format(model_path)) model = joblib.load(model_path) else: logger.info('Generating the model and saving it to cache - {}'.format( model_path)) model = LR_UnbiasedEstimator(loss_type=loss_type, fit_intercept=False) model.fit(dataset.train_X, dataset.train_Y) joblib.dump(model, model_path, compress=3) # Load the model coefficients. W = model.coefficients() assert W.shape[0] == nD # populate the test labels using the logistic regression model. # The value is continuous in the interval [-1,1] pY = model.predict_prob(dataset.test_X) dataset.test_Y = (pY * 2.) - 1. tf.reset_default_graph() vW = tf.constant(W, name='w', dtype=tf.float32) influence = Influence(W=vW, loss_type=loss_type) num_train = dataset.train_X.shape[0] # compute class weights. class_weights = {} unique_classes = np.unique(dataset.train_Y, return_counts=True) for idx in range(unique_classes[0].shape[0]): c = unique_classes[0][idx] v = unique_classes[1][idx] / num_train class_weights[c] = v # Merge train and test data. X = np.vstack([dataset.train_X, dataset.test_X]) Y = np.hstack([dataset.train_Y, dataset.test_Y]) # Compute the marginal distance. marginal_distance = np.abs(np.dot(X, W)) / np.linalg.norm(W, ord=2) marginal_distance_path = os.path.join( dataset_cache_dir, 'marginal_distance_{}_{}.dat'.format(dataset_name, loss_type)) joblib.dump(marginal_distance, marginal_distance_path, compress=3) # Load data set mapping into a pandas data frame. mapping_file = os.path.join( os.getenv('DATA_DIR'), 'animal_breed_sdm/nameMapping_fullInfo_flipped0.2.mat') data = sio.loadmat(mapping_file)['nameMapping'] rows = [] for d in data: rows.append([x[0] for x in d.tolist()]) df = pd.DataFrame(rows, columns=[ 'img_name', 'common_name', 'dataset', 'train_test', 'class', 'is_flipped' ]) all_names = dataset.train_file_names + dataset.test_file_names if test_mode: tr_pos = np.where(dataset.train_Y_ORIG > 0)[0][:20] tr_neg = np.where(dataset.train_Y_ORIG < 0)[0][:20] te_pos = np.where(dataset.test_Y_ORIG > 0)[0][:5] te_neg = np.where(dataset.test_Y_ORIG < 0)[0][:5] tr_idx = np.hstack([tr_pos, tr_neg]) te_idx = np.hstack([te_pos, te_neg]) tr_names = [dataset.train_file_names[i] for i in tr_idx] te_names = [dataset.test_file_names[i] for i in te_idx] names = tr_names + te_names te_idx = te_idx + num_train list_of_idx = np.hstack([tr_idx, te_idx]) file_names = [x.split('.')[0] for x in names] df_names = pd.DataFrame(file_names, columns=['fname']) df_filt = pd.merge(left=df, right=df_names, left_on='common_name', right_on='fname', how='inner') else: list_of_idx = np.arange(X.shape[0]) inf_pert_loss = np.zeros(shape=[X.shape[0], num_train]) is_flipped = np.zeros(shape=X.shape[0]) for idx in list_of_idx: # compute the gradient w.r.t the given example. Please note that the given example could be test or train point in our framework. x = X[idx, :] y = Y[idx] vX = tf.Variable(x, 'test_x', dtype=tf.float32) vY = tf.Variable(y, 'test_y', dtype=tf.float32) test_dl_dw = influence.dl_dw(vX, vY, vW) file_name = all_names[idx].split('.')[0] _df = df[df.common_name == file_name].values[0] if _df[5] == 'flipped': is_flipped[idx] = 1 # compute the hvp (hessian vector product) using the gradient for the given example. cache_file = os.path.join( os.path.join(dataset_cache_dir, 'inv_hvp', 'inv_hvp_{}_{}.npz'.format(loss_type, idx))) if not force_refresh and os.path.exists(cache_file): logger.debug('Loading HVP file for idx {} from cache at {}'.format( idx, cache_file)) inv_hvp = np.load(cache_file)['inv_hvp'] else: start = datetime.datetime.now() inv_hvp = influence.inv_hvp_lissa_fast(dataset, test_dl_dw) end = datetime.datetime.now() exec_time = (end - start).total_seconds() logger.debug('Saving HVP file for idx {} to cache at {}'.format( idx, cache_file)) np.savez_compressed(cache_file, inv_hvp=inv_hvp) # compute the influence of each training example on the given example influence_on_training_points = [] # compute the influence of each training points. for train_idx in range(num_train): trX, trY = dataset.fetch_train_instance(train_idx) vX = tf.Variable(trX, 'X', dtype=tf.float32) vY = tf.Variable(trY, 'Y', dtype=tf.float32) dl_dydw = influence.dl_dydw(vX, vY, vW).numpy() a = -1 * np.tensordot(dl_dydw, inv_hvp, axes=1).flatten()[0] _influence = a * class_weights[int(trY)] influence_on_training_points.append(_influence) inf_pert_loss[idx, train_idx] = _influence # save the results to the disk. results_file = os.path.join( dataset_cache_dir, 'inf_scores', 'influence_scores_{}_{}_{}_{}.dat'.format(idx, dataset_name, loss_type, is_flipped[idx])) joblib.dump(inf_pert_loss[idx, :], results_file, compress=3) logger.debug( 'Saving the influence scores on all the trainign poitns for idx {} to {}' .format(idx, results_file)) # save the results to the disk. results_file = os.path.join( dataset_results_dir, 'influence_scores_{}_{}.dat'.format(dataset_name, loss_type)) joblib.dump(inf_pert_loss, results_file, compress=3) logger.debug( 'Saving the perturbation loss results to the disk at {}'.format( results_file)) is_flipped_file = os.path.join( dataset_results_dir, 'example_flipped_{}_{}.dat'.format(dataset_name, loss_type)) joblib.dump(is_flipped, is_flipped_file, compress=3) logger.debug( 'Saving the flipped data to the disk at {}'.format(is_flipped_file)) if test_mode: img_path = os.path.join(os.getenv('DATA_DIR'), 'animal_breed_sdm/data_dog_flipped0.2/all/') rows = 2 columns = 5 for i in range(len(list_of_idx)): idx = list_of_idx[i] inf_scores = inf_pert_loss[idx, :] n_images = 10 bot_10_idx = np.argsort(inf_scores)[:n_images] top_10_idx = np.argsort(-inf_scores)[:n_images] rnd_10_idx = np.random.randint(0, inf_scores.size, n_images) file_name = file_names[i] _df = df[df.common_name == file_name].values[0] if _df[5] == 'flipped': results_pdf_file = os.path.join( dataset_results_dir, 'test_mode_results_{}_{}_flipped.pdf'.format(i, loss_type)) else: results_pdf_file = os.path.join( dataset_results_dir, 'test_mode_results_{}_{}.pdf'.format(i, loss_type)) pp = PdfPages(results_pdf_file) img = mpimg.imread( os.path.join(img_path, '{}.jpg'.format(file_name))) # Test image. fig = plt.figure() plt.imshow(img) plt.title('%s/%s/[Flip:%s]' % (_df[3], _df[4], _df[5])) pp.savefig(fig) # Test image. fig = plt.figure() plt.plot(np.arange(inf_scores.shape[0]), inf_scores) plt.ylim(-2., 2.) plt.title('%s/%s/[Flip:%s]' % (_df[3], _df[4], _df[5])) pp.savefig(fig) # Bottom. fig = plt.figure(figsize=(20, 10)) for i in range(10): img_idx = bot_10_idx[i] _file_name = dataset.train_file_names[img_idx].split('.')[0] _df = df[df.common_name == _file_name].values[0] _inf_score = inf_scores[img_idx] img = mpimg.imread( os.path.join(img_path, '{}.jpg'.format(_file_name))) fig.add_subplot(rows, columns, i + 1) plt.imshow(img) plt.title('%s/%0.6f/[Flip:%s]' % (_df[4], _inf_score, _df[5])) pp.savefig(fig) # Top. fig = plt.figure(figsize=(20, 10)) for i in range(10): img_idx = top_10_idx[i] _file_name = dataset.train_file_names[img_idx].split('.')[0] _df = df[df.common_name == _file_name].values[0] _inf_score = inf_scores[img_idx] img = mpimg.imread( os.path.join(img_path, '{}.jpg'.format(_file_name))) fig.add_subplot(rows, columns, i + 1) plt.imshow(img) plt.title('%s/%0.6f/[Flip:%s]' % (_df[4], _inf_score, _df[5])) pp.savefig(fig) # Random. fig = plt.figure(figsize=(20, 10)) for i in range(10): img_idx = rnd_10_idx[i] _file_name = dataset.train_file_names[img_idx].split('.')[0] _df = df[df.common_name == _file_name].values[0] _inf_score = inf_scores[img_idx] img = mpimg.imread( os.path.join(img_path, '{}.jpg'.format(_file_name))) fig.add_subplot(rows, columns, i + 1) plt.imshow(img) plt.title('%s/%0.6f/[Flip:%s]' % (_df[4], _inf_score, _df[5])) pp.savefig(fig) print('-----------------------') pp.close() # for test_idx in tgt_indices: # start = datetime.datetime.now() # inv_hvp = influence.inv_hvp_lissa(dataset, v_idx=test_idx, v_type=v_type) # end = datetime.datetime.now() # exec_time = (end-start).total_seconds() # print('===== Executed in {:0.2f} seconds ====='.format(exec_time)) # # influence_on_training_points = [] # # compute the influence of each training points. # for train_idx in range(num_train): # tr_X, tr_Y = dataset.fetch_train_instance(train_idx) # # v_X = tf.Variable(tr_X, 'X', dtype=tf.float32) # v_Y = tf.Variable(tr_Y, 'Y', dtype=tf.float32) # # dl_dw = influence.dl_dw(v_X, v_Y, v_W) # dl_dydw = influence.dl_dydw(v_X, v_Y, v_W).numpy() # a = -1* np.tensordot(dl_dydw, inv_hvp, axes=1).flatten()[0] # # # _influence = a*class_weights[int(tr_Y)] # # influence_on_training_points.append(_influence) # # inf_pert_loss[test_idx, train_idx] = _influence # # # np.savez_compressed('./cache/inf_of_test_{}_{}_{}_{}.npz'.format(v_type, dataset_name, loss_type, test_idx), influence = _influence) # # np.savez_compressed('./cache/{}/inf_pert_loss_{}_{}_{}.npz'.format(dataset_name, v_type, dataset_name, loss_type), # inf_pert_loss=inf_pert_loss) print( '**************************** COMPLETE *****************************************' )
model = LR_UnbiasedEstimator(loss_type=loss_type, fit_intercept=False) model.fit(tr_X, tr_y) joblib.dump(model, model_path, compress=3) # Load the model coefficients. W = model.coefficients() assert W.shape[0] == nD # populate the test labels using the logistic regression model. # The value is continuous in the interval [-1,1] pY = model.predict_prob(te_X) te_y = (pY * 2.) - 1. tf.reset_default_graph() vW = tf.constant(W, name='w', dtype=tf.float32) influence = Influence(W=vW, loss_type=loss_type) num_train = tr_X.shape[0] # compute class weights. class_weights = {} unique_classes = np.unique(tr_y, return_counts=True) for idx in range(unique_classes[0].shape[0]): c = unique_classes[0][idx] v = unique_classes[1][idx] / num_train class_weights[c] = v # Merge train and test data. X = np.vstack([tr_X, te_X]) Y = np.hstack([tr_y, te_y])
def runFrankWolfe(G, nsamples, k, log_file, opt_file, num_fw_iter, p, num_influ_iter, if_herd): N = nx.number_of_nodes(G) x = Variable(torch.Tensor([1.0*k/N]*N)) bufsize = 0 f = open(log_file, 'w', bufsize) influ_obj = Influence(G, p, num_influ_iter) tic = time.clock() iter_num = 0 obj = getRelax(G, x, nsamples, influ_obj, if_herd) toc = time.clock() influ_val = [] influ_val_best = [] influ_best = -10 print "Iteration: ", iter_num, " obj = ", obj.item(), " time = ", (toc - tic), " Total/New/Cache: ", influ_obj.itr_total , influ_obj.itr_new , influ_obj.itr_cache f.write(str(toc - tic) + " " + str(obj.item()) + " " + str(influ_obj.itr_total) + '/' + str(influ_obj.itr_new) + '/' + str(influ_obj.itr_cache) + "\n") for iter_num in np.arange(1, num_fw_iter): influ_obj.counter_reset() grad = getGrad(G, x, nsamples, influ_obj, if_herd) x_star = getCondGrad(grad, k) step = 2.0/(iter_num + 2) x = step*x_star + (1 - step)*x obj = getRelax(G, x, nsamples, influ_obj, if_herd) toc = time.clock() print "Iteration: ", iter_num, " obj = ", obj.item(), " time = ", (toc - tic), " Total/New/Cache: ", influ_obj.itr_total , influ_obj.itr_new , influ_obj.itr_cache f.write(str(toc - tic) + " " + str(obj.item()) + " " + str(influ_obj.itr_total) + '/' + str(influ_obj.itr_new) + '/' + str(influ_obj.itr_cache) + "\n") if iter_num % 10 == 0: #Round the current solution and get function values top_k = Variable(torch.zeros(N)) #conditional grad sorted_ind = torch.sort(x, descending = True)[1][0:k] top_k[sorted_ind] = 1 influ = submodObj(G, top_k, p, 100) influ_val.append(influ) if influ > influ_best: influ_best = influ influ_val_best.append(influ_best) f.close() x_opt = x #Round the optimum solution and get function values top_k = Variable(torch.zeros(N)) #conditional grad sorted_ind = torch.sort(x_opt, descending = True)[1][0:k] top_k[sorted_ind] = 1 gt_val = submodObj(G, top_k, p, 100) #Save optimum solution and value f = open(opt_file, 'w') for i in range(len(influ_val)): f.write(str(influ_val[i].item()) + ' ' + str(influ_val_best[i].item()) + '\n') f.write(str(gt_val.item()) + '\n') for x_t in x_opt: f.write(str(x_t.item()) + '\n') f.close() return x
def main(features_path, dataset_name, v_type, loss_type=1, debug=False, plot_results=False): # Load the jedi dataset. dataset = JediDataset(features_path=features_path, name=dataset_name) NUM_FEATURES = dataset.train_X.shape[1] images_path = '/home/arun/research/projects/crowdsourcing/kdd-2019/data/cats_dogs/all' # Build the classifier. # TODO: Modify the code to use the LR unbiased estimator. model = LR_UnbiasedEstimator(setting=loss_type) model.fit(dataset.train_X, dataset.train_Y) W = model.coefficients() dataset.test_Y = model.predict(dataset.test_X) assert W.shape[0] == NUM_FEATURES tf.reset_default_graph() v_W = tf.constant(W, name='w', dtype=tf.float32) if loss_type == 0: influence = Influence(W=v_W, loss_type='logistic_loss') else: influence = Influence(W=v_W, loss_type='surrogate_loss') num_train = dataset.train_X.shape[0] num_test = dataset.test_X.shape[0] # compute class weights. class_weights = {} unique_classes = np.unique(dataset.train_Y, return_counts=True) for idx in range(unique_classes[0].shape[0]): c = unique_classes[0][idx] v = unique_classes[1][idx]/num_train class_weights[c] = v # Compute the influence on perturbation loss inf_pert_loss = np.zeros(shape=[num_test, num_train]) if v_type == 'train': tgt_indices = np.arange(num_train) inf_pert_loss = np.zeros(shape=[num_train, num_train]) elif v_type == 'test': tgt_indices = np.arange(num_test) for test_idx in tgt_indices: start = datetime.datetime.now() inv_hvp = influence.inv_hvp_lissa(dataset, v_idx=test_idx, v_type=v_type) end = datetime.datetime.now() exec_time = (end-start).total_seconds() print('===== Executed in {:0.2f} seconds ====='.format(exec_time)) influence_on_training_points = [] # compute the influence of each training points. for train_idx in range(num_train): tr_X, tr_Y = dataset.fetch_train_instance(train_idx) v_X = tf.Variable(tr_X, 'X', dtype=tf.float32) v_Y = tf.Variable(tr_Y, 'Y', dtype=tf.float32) dl_dw = influence.dl_dw(v_X, v_Y, v_W) dl_dydw = influence.dl_dydw(v_X, v_Y, v_W).numpy() a = -1* np.tensordot(dl_dydw, inv_hvp, axes=1).flatten()[0] _influence = a*class_weights[int(tr_Y)] influence_on_training_points.append(_influence) inf_pert_loss[test_idx, train_idx] = _influence # np.savez_compressed('./cache/inf_of_test_{}_{}_{}_{}.npz'.format(v_type, dataset_name, loss_type, test_idx), influence = _influence) np.savez_compressed('./cache/{}/inf_pert_loss_{}_{}_{}.npz'.format(dataset_name, v_type, dataset_name, loss_type), inf_pert_loss=inf_pert_loss) print('**************************** COMPLETE *****************************************')
def fw_reduced_nodes(G, nsamples, k, log_file, opt_file, iterates_file, num_fw_iter, p, num_influ_iter, if_herd, x_good, a): N = nx.number_of_nodes(G) D = 200 influ_obj = Influence(G, p, num_influ_iter) important_nodes = getImportantNodes(G, D) x = Variable(torch.Tensor([1e-4]*N)) x[important_nodes] = 1.0*k/D bufsize = 0 f = open(log_file, 'w', bufsize) f2 = open(iterates_file, 'w', bufsize) tic = time.clock() iter_num = 0 obj = getImportanceRelax(G, x_good, x, nsamples, influ_obj, if_herd, a) toc = time.clock() print "Iteration: ", iter_num, " obj = ", obj.item(), " time = ", (toc - tic), " Total/New/Cache: ", influ_obj.itr_total , influ_obj.itr_new , influ_obj.itr_cache f.write(str(toc - tic) + " " + str(obj.item()) + " " + str(influ_obj.itr_total) + '/' + str(influ_obj.itr_new) + '/' + str(influ_obj.itr_cache) + "\n") for x_t in x: f2.write(str(x_t.item()) + '\n') f2.write('\n') for iter_num in np.arange(1, num_fw_iter): influ_obj.counter_reset() grad = getReducedPrunedGrad(G, x_good, x,nsamples, influ_obj, if_herd, a, important_nodes) x_star = getCondGrad(grad, k) step = 2.0/(iter_num + 2) x = step*x_star + (1 - step)*x obj = getImportanceRelax(G, x_good, x, nsamples, influ_obj, if_herd, a) toc = time.clock() print "Iteration: ", iter_num, " obj = ", obj.item(), " time = ", (toc - tic), " Total/New/Cache: ", influ_obj.itr_total , influ_obj.itr_new , influ_obj.itr_cache f.write(str(toc - tic) + " " + str(obj.item()) + " " + str(influ_obj.itr_total) + '/' + str(influ_obj.itr_new) + '/' + str(influ_obj.itr_cache) + "\n") for x_t in x: f2.write(str(x_t.item()) + '\n') f2.write('\n') f.close() f2.close() x_opt = x #Round the optimum solution and get function values top_k = Variable(torch.zeros(N)) #conditional grad sorted_ind = torch.sort(x_opt, descending = True)[1][0:k] top_k[sorted_ind] = 1 gt_val = submodObj(G, top_k, p, 100) #Save optimum solution and value f = open(opt_file, 'w') f.write(str(gt_val.item()) + '\n') for x_t in x_opt: f.write(str(x_t.item()) + '\n') f.close() return x_opt