def study_k_effect_facebook():

    graph_file = '/home/pankaj/Sampling/data/input/social_graphs/facebook/facebook_combined.txt'

    N = 4039

    G = read_facebook_graph(graph_file, N)

    influ_obj = Influence(G, 0.3, 200)

    degree_dict = nx.out_degree_centrality(G)
 
    val = degree_dict.values()

    print [N*x for x in np.sort(val)[-10:]]

    for k in range(10, 200, 10):
    

        a = np.argsort(val)[-k:]

        sample = torch.zeros(N)

        sample[a] = 1

        print k, influ_obj(sample.numpy()).item()
def training_mc(x, adj, node_feat, net, lr1, lr2, n_epochs1, n_epochs2, mom, nsamples_mc, file_prefix, p, num_influ_iter):

    f = open(file_prefix + '_training_log.txt', 'w')

    optimizer = optim.Adam(net.parameters(), lr=lr1)

    for epoch in range(n_epochs1):
#    for epoch in range(100):

        #get minibatch
        optimizer.zero_grad()   # zero the gradient buffers
    
        y = net(x, adj, node_feat) 

        train_loss = reconstruction_loss(x, y)

        print "Epoch: ", epoch, "       reconstruction loss = ", train_loss.item()
        f.write(str(0) + " " +  str(epoch) +  " " + str(train_loss.item()) + "\n")

        train_loss.backward()
    
        optimizer.step()    # Does the update

#    torch.save(net.state_dict(), file_prefix + '_net.dat')
#    net.load_state_dict(torch.load(file_prefix + '_net.dat'))

    G = nx.DiGraph(adj[0].numpy())

    influ_obj = Influence(G, p, num_influ_iter)

    optimizer = optim.SGD(net.parameters(), lr=lr2)
#    optimizer = optim.Adam(net.parameters(), lr=lr2)

    for epoch in range(n_epochs2):

        #get minibatch
        optimizer.zero_grad()   # zero the gradient buffers
    
        y = net(x, adj, node_feat) 
 
#        train_loss = kl_loss_mc_uniform(x, y, 10, influ_obj)
        train_loss2 = kl_loss_mc_y(x, y, nsamples_mc, influ_obj)

        print "Epoch: ", epoch, "       KL-based loss = ", train_loss2.item()
        f.write(str(1) + " " +  str(epoch) +  " " + str(train_loss2.item()) + "\n")

        train_loss2.backward()
    
        optimizer.step()    # Does the update
    
    torch.save(net.state_dict(), file_prefix + '_net.dat')
    f.close()
    y = net(x, adj, node_feat) 
    return y
예제 #3
0
def variance_study(G, nsamples, k, var_file, p, num_influ_iter, if_herd,
                   x_good_sfo, x_good_fw, x, a):

    N = nx.number_of_nodes(G)

    influ_obj = Influence(G, p, num_influ_iter)

    temp = []

    if a == 0:
        for t in range(40):
            val = getRelax(G, x, nsamples, influ_obj, if_herd).item()
            temp.append((val, val, val))

    else:
        for t in range(40):
            val1 = getImportanceRelax(G, x_good_sfo, x, nsamples, influ_obj,
                                      if_herd, a).item()
            val2 = getImportanceRelax(G, x_good_fw, x, nsamples, influ_obj,
                                      if_herd, a).item()
            val3 = getRelax(G, x, nsamples, influ_obj, if_herd).item()
            temp.append((val1, val2, val3))

    relax_gt = getRelax(G, x, 200, influ_obj, if_herd).item()

    print('\n' * 2)
    print("sfo std= ", np.std([t[0] for t in temp]), "  mean = ",
          np.mean([t[0] for t in temp]))
    print("fw std = ", np.std([t[1] for t in temp]), "  mean = ",
          np.mean([t[1] for t in temp]))
    print("mc std = ", np.std([t[2] for t in temp]), "  mean = ",
          np.mean([t[2] for t in temp]))
    print("gt = ", relax_gt)

    f = open(var_file, 'a', 0)
    f.write(
        str(np.std([t[0] for t in temp])) + " " +
        str(np.mean([t[0] for t in temp])) + " " + str(relax_gt) + "\n")
    f.write(
        str(np.std([t[1] for t in temp])) + " " +
        str(np.mean([t[1] for t in temp])) + " " + str(relax_gt) + "\n")
    f.write(
        str(np.std([t[2] for t in temp])) + " " +
        str(np.mean([t[2] for t in temp])) + " " + str(relax_gt) + "\n")
    f.write('\n')
    f.close()
def multilinear_variance_study():

    file_id = int(sys.argv[1])
    nsamples = int(sys.argv[2])
    nNodes = 512 
    bufsize = 0
    p = 0.4 
    num_influ_iter = 100

    f = open(dirw + 'multilinear_variance_study_p_' + str(p) + '_N_' + str(nNodes) + '_' + str(file_id) + '_' + str(nsamples) +'.txt', 'w', bufsize)

    ngraphs = 10 
    graph_dir = "/home/pankaj/Sampling/data/input/social_graphs/N_" + str(nNodes) + "/"

    file_list = os.listdir(graph_dir)
    graph_file_list = []

    for i in range(ngraphs):
        if 'log' not in file_list[i] and 'gt' not in file_list[i]:
            graph_file_list.append(file_list[i])

    G = read_graph(graph_dir + graph_file_list[file_id], nNodes)

    influ_obj = Influence(G, p, num_influ_iter)

    for t in range(4):
        x = torch.rand(nNodes)
        val = []
        tic = time.clock()

        for k in range(20):
            val.append(getRelax(G, x, nsamples, influ_obj, herd = False).item())

        to_write_list = [file_id, np.var(val), (time.clock() - tic)/20]
        print ' '.join(map(str, to_write_list)) + '\n'
        sys.stdout.flush()
        f.write(' '.join(map(str, to_write_list)) + '\n')
예제 #5
0
 def do_setup(self):
     # initialize data structures after learning the game settings
     self.strat_influence = Influence(self.gamestate, STRAT_DECAY)
     self.planner = Planner(self.gamestate, self.strat_influence)
예제 #6
0
class MyBot:
    def __init__(self, gamestate):
        # define class level variables, will be remembered between turns
        self.gamestate = gamestate
        self.planner_time = gamestate.turntime / 2
    
    # do_setup is run once at the start of the game
    # after the bot has received the game settings
    def do_setup(self):
        # initialize data structures after learning the game settings
        self.strat_influence = Influence(self.gamestate, STRAT_DECAY)
        self.planner = Planner(self.gamestate, self.strat_influence)
    
    def log_turn(self, turn_no):
        if DETAIL_LOG and os.path.isdir('pickle'):
            # dump gamestate
            pickle_file = open('pickle/turn_' + str(self.gamestate.current_turn) + '.gamestate', 'wb')
            pickle.dump(self.gamestate, pickle_file)
            pickle_file.close()
            
            # dump influence map value
            pickle_file = open('pickle/turn_' + str(self.gamestate.current_turn) + '.influence', 'wb')
            pickle.dump(self.strat_influence, pickle_file)
            pickle_file.close()
    
    # do turn is run once per turn
    def do_turn(self):
        logging.debug('turn ' + str(self.gamestate.current_turn))
        
        # detailed logging
        self.log_turn(self.gamestate.current_turn)
        
        # handle combat
        self.issue_combat_task()
        
        plan_start = self.gamestate.time_remaining()
        # decay strategy influence
        logging.debug('strat_influence.decay().start = %s' % str(self.gamestate.time_remaining())) 
        self.strat_influence.decay()
        logging.debug('strat_influence.decay().finish = %s' % str(self.gamestate.time_remaining())) 
        # use planner to set new influence
        self.planner.do_plan()
        plan_duration = plan_start - self.gamestate.time_remaining()
        self.planner_time = max([plan_duration, self.planner_time])
        
        # diffuse strategy influence
        logging.debug('strat_influence.diffuse().start = %s' % str(self.gamestate.time_remaining())) 
        for i in xrange(3):
            self.strat_influence.diffuse()
            if self.gamestate.time_remaining() < 50:
                logging.debug('stopped diffuse after %d times' % i)
                break
        logging.debug('strat_influence.diffuse().finish = %s' % str(self.gamestate.time_remaining())) 

        # handle explorer
        self.issue_explore_task()
        logging.debug('endturn: ant_count = %d, time_elapsed = %s' % (len(self.gamestate.ant_list), self.gamestate.time_elapsed()))

    def issue_combat_task(self):
        'combat logic'
        logging.debug('issue_combat_task.start = %s' % str(self.gamestate.time_remaining())) 
        zones = battle.get_combat_zones(self.gamestate)
        
        logging.debug('zones = %s' % str(zones))
        for zone in zones:
            logging.debug('group combat loop for = %s' % str(zone))
            if len(zone[0]) > 0:
                battle.do_zone_combat(self.gamestate, zone)
            
            # check if we still have time left to calculate more orders
            if self.gamestate.time_remaining() < self.planner_time + 50:
                break
                
        logging.debug('issue_combat_task.finish = ' + str(self.gamestate.time_remaining())) 
        
    def issue_explore_task(self):
        'explore map'
        logging.debug('issue_explore_task.start = %s' % str(self.gamestate.time_remaining())) 
        # loop through all my un-moved ants and set them to explore
        # the ant_loc is an ant location tuple in (row, col) form
        for cur_loc in self.gamestate.my_unmoved_ants():
            all_locs = [cur_loc] + [self.gamestate.destination(cur_loc, d) 
                                    for d in self.gamestate.passable_directions(cur_loc)]
            loc_influences = [self.strat_influence.map[loc] for loc in all_locs]
            best_directions = self.gamestate.direction(cur_loc, all_locs[loc_influences.index(min(loc_influences))])
            if len(best_directions) > 0:
                self.gamestate.issue_order((cur_loc, choice(best_directions)))
            
            # check if we still have time left to calculate more orders
            if self.gamestate.time_remaining() < 10:
                break
        logging.debug('issue_explore_task.finish = ' + str(self.gamestate.time_remaining())) 


    # static methods are not tied to a class and don't have self passed in
    # this is a python decorator
    @staticmethod
    def run():
        'parse input, update game state and call the bot classes do_turn method'
        gamestate = GameState()
        bot = MyBot(gamestate)
        map_data = ''
        while(True):
            try:
                current_line = sys.stdin.readline().rstrip('\r\n') # string new line char
                if current_line.lower() == 'ready':
                    gamestate.setup(map_data)
                    bot.do_setup()
                    gamestate.finish_turn()
                    map_data = ''
                elif current_line.lower() == 'go':
                    gamestate.update(map_data)
                    # call the do_turn method of the class passed in
                    bot.do_turn()
                    gamestate.finish_turn()
                    map_data = ''
                else:
                    map_data += current_line + '\n'
            except EOFError:
                break
            except KeyboardInterrupt:
                raise
            except:
                # don't raise error or return so that bot attempts to stay alive
                traceback.print_exc(file=sys.stderr)
                sys.stderr.flush()
예제 #7
0
def main(features_path,
         dataset_name,
         loss_type,
         test_mode=False,
         force_refresh=False):

    # Check and create needed files.
    cache_dir = os.getenv('CACHE_DIR', None)
    if not cache_dir:
        cache_dir = '/tmp/influence-cache'
        os.mkdir(cache_dir)

    dataset_cache_dir = os.path.join(cache_dir, dataset_name)

    results_dir = os.getenv('RESULTS_DIR')
    if not results_dir:
        results_dir = '/tmp/influence-results'
        os.mkdir(results_dir)
    dataset_results_dir = os.path.join(results_dir, dataset_name)

    if not os.path.exists(dataset_cache_dir):
        os.mkdir(dataset_cache_dir)
        os.mkdir(os.path.join(dataset_cache_dir, 'inv_hvp'))

    # Load the jedi dataset.
    dataset = JediDataset(features_path=features_path, name=dataset_name)

    # Add intercept to train and test data.
    dataset.train_X = np.concatenate((np.ones(
        (dataset.train_X.shape[0], 1)), dataset.train_X),
                                     axis=1)
    dataset.test_X = np.concatenate((np.ones(
        (dataset.test_X.shape[0], 1)), dataset.test_X),
                                    axis=1)

    # num features/dimensions
    nD = dataset.train_X.shape[1]

    # Compute the logistic regression model. Ignore appending intercept to the data. Load from cache if the model already exists.
    model_path = os.path.join(
        dataset_cache_dir, 'model_{}_{}.dat'.format(dataset_name, loss_type))
    if not force_refresh and os.path.exists(model_path):
        logger.info(
            'Loading the model from the cache file - {}'.format(model_path))
        model = joblib.load(model_path)
    else:
        logger.info('Generating the model and saving it to cache - {}'.format(
            model_path))
        model = LR_UnbiasedEstimator(loss_type=loss_type, fit_intercept=False)
        model.fit(dataset.train_X, dataset.train_Y)
        joblib.dump(model, model_path, compress=3)

    # Load the model coefficients.
    W = model.coefficients()
    assert W.shape[0] == nD

    # populate the test labels using the logistic regression model.
    # The value is continuous in the interval [-1,1]
    pY = model.predict_prob(dataset.test_X)
    dataset.test_Y = (pY * 2.) - 1.

    tf.reset_default_graph()
    vW = tf.constant(W, name='w', dtype=tf.float32)
    influence = Influence(W=vW, loss_type=loss_type)

    num_train = dataset.train_X.shape[0]

    # compute class weights.
    class_weights = {}
    unique_classes = np.unique(dataset.train_Y, return_counts=True)
    for idx in range(unique_classes[0].shape[0]):
        c = unique_classes[0][idx]
        v = unique_classes[1][idx] / num_train
        class_weights[c] = v

    # Merge train and test data.
    X = np.vstack([dataset.train_X, dataset.test_X])
    Y = np.hstack([dataset.train_Y, dataset.test_Y])

    # Compute the marginal distance.
    marginal_distance = np.abs(np.dot(X, W)) / np.linalg.norm(W, ord=2)
    marginal_distance_path = os.path.join(
        dataset_cache_dir,
        'marginal_distance_{}_{}.dat'.format(dataset_name, loss_type))
    joblib.dump(marginal_distance, marginal_distance_path, compress=3)

    # Load data set mapping into a pandas data frame.
    mapping_file = os.path.join(
        os.getenv('DATA_DIR'),
        'animal_breed_sdm/nameMapping_fullInfo_flipped0.2.mat')
    data = sio.loadmat(mapping_file)['nameMapping']

    rows = []
    for d in data:
        rows.append([x[0] for x in d.tolist()])
    df = pd.DataFrame(rows,
                      columns=[
                          'img_name', 'common_name', 'dataset', 'train_test',
                          'class', 'is_flipped'
                      ])
    all_names = dataset.train_file_names + dataset.test_file_names

    if test_mode:
        tr_pos = np.where(dataset.train_Y_ORIG > 0)[0][:20]
        tr_neg = np.where(dataset.train_Y_ORIG < 0)[0][:20]
        te_pos = np.where(dataset.test_Y_ORIG > 0)[0][:5]
        te_neg = np.where(dataset.test_Y_ORIG < 0)[0][:5]

        tr_idx = np.hstack([tr_pos, tr_neg])
        te_idx = np.hstack([te_pos, te_neg])

        tr_names = [dataset.train_file_names[i] for i in tr_idx]
        te_names = [dataset.test_file_names[i] for i in te_idx]

        names = tr_names + te_names

        te_idx = te_idx + num_train

        list_of_idx = np.hstack([tr_idx, te_idx])

        file_names = [x.split('.')[0] for x in names]
        df_names = pd.DataFrame(file_names, columns=['fname'])

        df_filt = pd.merge(left=df,
                           right=df_names,
                           left_on='common_name',
                           right_on='fname',
                           how='inner')

    else:
        list_of_idx = np.arange(X.shape[0])

    inf_pert_loss = np.zeros(shape=[X.shape[0], num_train])
    is_flipped = np.zeros(shape=X.shape[0])

    for idx in list_of_idx:

        # compute the gradient w.r.t the given example. Please note that the given example could be test or train point in our framework.
        x = X[idx, :]
        y = Y[idx]

        vX = tf.Variable(x, 'test_x', dtype=tf.float32)
        vY = tf.Variable(y, 'test_y', dtype=tf.float32)

        test_dl_dw = influence.dl_dw(vX, vY, vW)

        file_name = all_names[idx].split('.')[0]
        _df = df[df.common_name == file_name].values[0]
        if _df[5] == 'flipped':
            is_flipped[idx] = 1

        # compute the hvp (hessian vector product) using the gradient for the given example.
        cache_file = os.path.join(
            os.path.join(dataset_cache_dir, 'inv_hvp',
                         'inv_hvp_{}_{}.npz'.format(loss_type, idx)))
        if not force_refresh and os.path.exists(cache_file):
            logger.debug('Loading HVP file for idx {} from cache at {}'.format(
                idx, cache_file))
            inv_hvp = np.load(cache_file)['inv_hvp']
        else:
            start = datetime.datetime.now()
            inv_hvp = influence.inv_hvp_lissa_fast(dataset, test_dl_dw)
            end = datetime.datetime.now()
            exec_time = (end - start).total_seconds()
            logger.debug('Saving HVP file for idx {} to cache at {}'.format(
                idx, cache_file))
            np.savez_compressed(cache_file, inv_hvp=inv_hvp)

        # compute the influence of each training example on the given example
        influence_on_training_points = []
        # compute the influence of each training points.
        for train_idx in range(num_train):
            trX, trY = dataset.fetch_train_instance(train_idx)

            vX = tf.Variable(trX, 'X', dtype=tf.float32)
            vY = tf.Variable(trY, 'Y', dtype=tf.float32)

            dl_dydw = influence.dl_dydw(vX, vY, vW).numpy()
            a = -1 * np.tensordot(dl_dydw, inv_hvp, axes=1).flatten()[0]

            _influence = a * class_weights[int(trY)]

            influence_on_training_points.append(_influence)

            inf_pert_loss[idx, train_idx] = _influence

        # save the results to the disk.
        results_file = os.path.join(
            dataset_cache_dir, 'inf_scores',
            'influence_scores_{}_{}_{}_{}.dat'.format(idx, dataset_name,
                                                      loss_type,
                                                      is_flipped[idx]))
        joblib.dump(inf_pert_loss[idx, :], results_file, compress=3)
        logger.debug(
            'Saving the influence scores on all the trainign poitns for idx {} to {}'
            .format(idx, results_file))

    # save the results to the disk.
    results_file = os.path.join(
        dataset_results_dir,
        'influence_scores_{}_{}.dat'.format(dataset_name, loss_type))
    joblib.dump(inf_pert_loss, results_file, compress=3)
    logger.debug(
        'Saving the perturbation loss results to the disk at {}'.format(
            results_file))

    is_flipped_file = os.path.join(
        dataset_results_dir,
        'example_flipped_{}_{}.dat'.format(dataset_name, loss_type))
    joblib.dump(is_flipped, is_flipped_file, compress=3)
    logger.debug(
        'Saving the flipped data to the disk at {}'.format(is_flipped_file))

    if test_mode:
        img_path = os.path.join(os.getenv('DATA_DIR'),
                                'animal_breed_sdm/data_dog_flipped0.2/all/')
        rows = 2
        columns = 5

        for i in range(len(list_of_idx)):

            idx = list_of_idx[i]
            inf_scores = inf_pert_loss[idx, :]

            n_images = 10

            bot_10_idx = np.argsort(inf_scores)[:n_images]
            top_10_idx = np.argsort(-inf_scores)[:n_images]
            rnd_10_idx = np.random.randint(0, inf_scores.size, n_images)

            file_name = file_names[i]
            _df = df[df.common_name == file_name].values[0]

            if _df[5] == 'flipped':
                results_pdf_file = os.path.join(
                    dataset_results_dir,
                    'test_mode_results_{}_{}_flipped.pdf'.format(i, loss_type))
            else:
                results_pdf_file = os.path.join(
                    dataset_results_dir,
                    'test_mode_results_{}_{}.pdf'.format(i, loss_type))

            pp = PdfPages(results_pdf_file)
            img = mpimg.imread(
                os.path.join(img_path, '{}.jpg'.format(file_name)))

            # Test image.
            fig = plt.figure()
            plt.imshow(img)
            plt.title('%s/%s/[Flip:%s]' % (_df[3], _df[4], _df[5]))
            pp.savefig(fig)

            # Test image.
            fig = plt.figure()
            plt.plot(np.arange(inf_scores.shape[0]), inf_scores)
            plt.ylim(-2., 2.)
            plt.title('%s/%s/[Flip:%s]' % (_df[3], _df[4], _df[5]))
            pp.savefig(fig)

            # Bottom.
            fig = plt.figure(figsize=(20, 10))
            for i in range(10):
                img_idx = bot_10_idx[i]
                _file_name = dataset.train_file_names[img_idx].split('.')[0]
                _df = df[df.common_name == _file_name].values[0]
                _inf_score = inf_scores[img_idx]

                img = mpimg.imread(
                    os.path.join(img_path, '{}.jpg'.format(_file_name)))
                fig.add_subplot(rows, columns, i + 1)
                plt.imshow(img)
                plt.title('%s/%0.6f/[Flip:%s]' % (_df[4], _inf_score, _df[5]))
            pp.savefig(fig)

            # Top.
            fig = plt.figure(figsize=(20, 10))
            for i in range(10):
                img_idx = top_10_idx[i]
                _file_name = dataset.train_file_names[img_idx].split('.')[0]
                _df = df[df.common_name == _file_name].values[0]
                _inf_score = inf_scores[img_idx]

                img = mpimg.imread(
                    os.path.join(img_path, '{}.jpg'.format(_file_name)))
                fig.add_subplot(rows, columns, i + 1)
                plt.imshow(img)
                plt.title('%s/%0.6f/[Flip:%s]' % (_df[4], _inf_score, _df[5]))
            pp.savefig(fig)

            # Random.
            fig = plt.figure(figsize=(20, 10))
            for i in range(10):
                img_idx = rnd_10_idx[i]
                _file_name = dataset.train_file_names[img_idx].split('.')[0]
                _df = df[df.common_name == _file_name].values[0]
                _inf_score = inf_scores[img_idx]

                img = mpimg.imread(
                    os.path.join(img_path, '{}.jpg'.format(_file_name)))
                fig.add_subplot(rows, columns, i + 1)
                plt.imshow(img)
                plt.title('%s/%0.6f/[Flip:%s]' % (_df[4], _inf_score, _df[5]))
            pp.savefig(fig)

            print('-----------------------')

            pp.close()

    # for test_idx in tgt_indices:
    #     start = datetime.datetime.now()
    #     inv_hvp = influence.inv_hvp_lissa(dataset, v_idx=test_idx, v_type=v_type)
    #     end = datetime.datetime.now()
    #     exec_time = (end-start).total_seconds()
    #     print('===== Executed in  {:0.2f} seconds ====='.format(exec_time))
    #
    #     influence_on_training_points = []
    #     # compute the influence of each training points.
    #     for train_idx in range(num_train):
    #         tr_X, tr_Y = dataset.fetch_train_instance(train_idx)
    #
    #         v_X = tf.Variable(tr_X, 'X', dtype=tf.float32)
    #         v_Y = tf.Variable(tr_Y, 'Y', dtype=tf.float32)
    #
    #         dl_dw = influence.dl_dw(v_X, v_Y, v_W)
    #         dl_dydw = influence.dl_dydw(v_X, v_Y, v_W).numpy()
    #         a = -1* np.tensordot(dl_dydw, inv_hvp, axes=1).flatten()[0]
    #
    #
    #         _influence = a*class_weights[int(tr_Y)]
    #
    #         influence_on_training_points.append(_influence)
    #
    #         inf_pert_loss[test_idx, train_idx] = _influence
    #
    #     # np.savez_compressed('./cache/inf_of_test_{}_{}_{}_{}.npz'.format(v_type, dataset_name, loss_type, test_idx), influence = _influence)
    #
    # np.savez_compressed('./cache/{}/inf_pert_loss_{}_{}_{}.npz'.format(dataset_name, v_type, dataset_name, loss_type),
    #                     inf_pert_loss=inf_pert_loss)

    print(
        '**************************** COMPLETE *****************************************'
    )
예제 #8
0
    model = LR_UnbiasedEstimator(loss_type=loss_type, fit_intercept=False)
    model.fit(tr_X, tr_y)
    joblib.dump(model, model_path, compress=3)

# Load the model coefficients.
W = model.coefficients()
assert W.shape[0] == nD

# populate the test labels using the logistic regression model.
# The value is continuous in the interval [-1,1]
pY = model.predict_prob(te_X)
te_y = (pY * 2.) - 1.

tf.reset_default_graph()
vW = tf.constant(W, name='w', dtype=tf.float32)
influence = Influence(W=vW, loss_type=loss_type)

num_train = tr_X.shape[0]

# compute class weights.
class_weights = {}
unique_classes = np.unique(tr_y, return_counts=True)
for idx in range(unique_classes[0].shape[0]):
    c = unique_classes[0][idx]
    v = unique_classes[1][idx] / num_train
    class_weights[c] = v

# Merge train and test data.
X = np.vstack([tr_X, te_X])
Y = np.hstack([tr_y, te_y])
예제 #9
0
def runFrankWolfe(G, nsamples, k, log_file, opt_file, num_fw_iter, p, num_influ_iter, if_herd):

    N = nx.number_of_nodes(G)

    x = Variable(torch.Tensor([1.0*k/N]*N))
    
    bufsize = 0

    f = open(log_file, 'w', bufsize)

    influ_obj = Influence(G, p, num_influ_iter)

    tic = time.clock()

    iter_num = 0
    obj = getRelax(G, x, nsamples, influ_obj, if_herd)
    toc = time.clock()

    influ_val = []
    influ_val_best = []
    influ_best = -10

    print "Iteration: ", iter_num, "    obj = ", obj.item(), "  time = ", (toc - tic),  "   Total/New/Cache: ", influ_obj.itr_total , influ_obj.itr_new , influ_obj.itr_cache

    f.write(str(toc - tic) + " " + str(obj.item()) + " " + str(influ_obj.itr_total) + '/' + str(influ_obj.itr_new) + '/' + str(influ_obj.itr_cache) + "\n") 

    for iter_num in np.arange(1, num_fw_iter):

        influ_obj.counter_reset()

        grad = getGrad(G, x, nsamples, influ_obj, if_herd)

        x_star = getCondGrad(grad, k)

        step = 2.0/(iter_num + 2) 

        x = step*x_star + (1 - step)*x

        obj = getRelax(G, x, nsamples, influ_obj, if_herd)
        
        toc = time.clock()

        print "Iteration: ", iter_num, "    obj = ", obj.item(), "  time = ", (toc - tic),  "   Total/New/Cache: ", influ_obj.itr_total , influ_obj.itr_new , influ_obj.itr_cache

        f.write(str(toc - tic) + " " + str(obj.item()) + " " + str(influ_obj.itr_total) + '/' + str(influ_obj.itr_new) + '/' + str(influ_obj.itr_cache) + "\n") 

        if iter_num % 10 == 0:

            #Round the current solution and get function values
            top_k = Variable(torch.zeros(N)) #conditional grad
            sorted_ind = torch.sort(x, descending = True)[1][0:k]
            top_k[sorted_ind] = 1
            influ = submodObj(G, top_k, p, 100)
            influ_val.append(influ)
            if influ > influ_best:
                influ_best = influ
            influ_val_best.append(influ_best)

    f.close()

    x_opt = x

    #Round the optimum solution and get function values
    top_k = Variable(torch.zeros(N)) #conditional grad
    sorted_ind = torch.sort(x_opt, descending = True)[1][0:k]
    top_k[sorted_ind] = 1
    gt_val = submodObj(G, top_k, p, 100)

    #Save optimum solution and value
    f = open(opt_file, 'w')

    for i in range(len(influ_val)):
        f.write(str(influ_val[i].item()) + ' ' + str(influ_val_best[i].item()) + '\n')

    f.write(str(gt_val.item()) + '\n')
    for x_t in x_opt:
        f.write(str(x_t.item()) + '\n')
    f.close()

    return x
예제 #10
0
def main(features_path, dataset_name, v_type, loss_type=1, debug=False, plot_results=False):

    # Load the jedi dataset.
    dataset = JediDataset(features_path=features_path, name=dataset_name)
    NUM_FEATURES = dataset.train_X.shape[1]
    
    images_path = '/home/arun/research/projects/crowdsourcing/kdd-2019/data/cats_dogs/all'

    # Build the classifier.
    # TODO: Modify the code to use the LR unbiased estimator.

    model = LR_UnbiasedEstimator(setting=loss_type)
    model.fit(dataset.train_X, dataset.train_Y)
    W = model.coefficients()

    dataset.test_Y = model.predict(dataset.test_X)

    assert W.shape[0] == NUM_FEATURES

    tf.reset_default_graph()

    v_W = tf.constant(W, name='w', dtype=tf.float32)

    if loss_type == 0:
        influence = Influence(W=v_W, loss_type='logistic_loss')
    else: 
        influence = Influence(W=v_W, loss_type='surrogate_loss')

    num_train = dataset.train_X.shape[0]
    num_test = dataset.test_X.shape[0]


    # compute class weights.
    class_weights = {}
    unique_classes = np.unique(dataset.train_Y, return_counts=True)
    for idx in range(unique_classes[0].shape[0]):
        c = unique_classes[0][idx]
        v = unique_classes[1][idx]/num_train
        class_weights[c] = v


    # Compute the influence on perturbation loss 
    inf_pert_loss = np.zeros(shape=[num_test, num_train])

    if v_type == 'train':
        tgt_indices = np.arange(num_train)
        inf_pert_loss = np.zeros(shape=[num_train, num_train])
    elif v_type == 'test':
        tgt_indices = np.arange(num_test)


    for test_idx in tgt_indices:
        start = datetime.datetime.now()
        inv_hvp = influence.inv_hvp_lissa(dataset, v_idx=test_idx, v_type=v_type)
        end = datetime.datetime.now()
        exec_time = (end-start).total_seconds()
        print('===== Executed in  {:0.2f} seconds ====='.format(exec_time))

        influence_on_training_points = []
        # compute the influence of each training points.
        for train_idx in range(num_train):
            tr_X, tr_Y = dataset.fetch_train_instance(train_idx)

            v_X = tf.Variable(tr_X, 'X', dtype=tf.float32)
            v_Y = tf.Variable(tr_Y, 'Y', dtype=tf.float32)

            dl_dw = influence.dl_dw(v_X, v_Y, v_W)
            dl_dydw = influence.dl_dydw(v_X, v_Y, v_W).numpy()
            a = -1* np.tensordot(dl_dydw, inv_hvp, axes=1).flatten()[0]


            _influence = a*class_weights[int(tr_Y)]

            influence_on_training_points.append(_influence)

            inf_pert_loss[test_idx, train_idx] = _influence

        # np.savez_compressed('./cache/inf_of_test_{}_{}_{}_{}.npz'.format(v_type, dataset_name, loss_type, test_idx), influence = _influence)

    np.savez_compressed('./cache/{}/inf_pert_loss_{}_{}_{}.npz'.format(dataset_name, v_type, dataset_name, loss_type),
                        inf_pert_loss=inf_pert_loss)

    print('**************************** COMPLETE *****************************************')
예제 #11
0
def fw_reduced_nodes(G, nsamples, k, log_file, opt_file, iterates_file, num_fw_iter, p, num_influ_iter, if_herd, x_good, a):

    N = nx.number_of_nodes(G)

    D = 200

    influ_obj = Influence(G, p, num_influ_iter)

    important_nodes = getImportantNodes(G, D)

    x = Variable(torch.Tensor([1e-4]*N))

    x[important_nodes] = 1.0*k/D

    bufsize = 0

    f = open(log_file, 'w', bufsize)
    f2 = open(iterates_file, 'w', bufsize)

    tic = time.clock()

    iter_num = 0
    obj = getImportanceRelax(G, x_good, x, nsamples, influ_obj, if_herd, a)
    toc = time.clock()

    print "Iteration: ", iter_num, "    obj = ", obj.item(), "  time = ", (toc - tic),  "   Total/New/Cache: ", influ_obj.itr_total , influ_obj.itr_new , influ_obj.itr_cache

    f.write(str(toc - tic) + " " + str(obj.item()) + " " + str(influ_obj.itr_total) + '/' + str(influ_obj.itr_new) + '/' + str(influ_obj.itr_cache) + "\n") 

    for x_t in x:
        f2.write(str(x_t.item()) + '\n')
    f2.write('\n')

    for iter_num in np.arange(1, num_fw_iter):

        influ_obj.counter_reset()

        grad = getReducedPrunedGrad(G, x_good, x,nsamples, influ_obj, if_herd, a, important_nodes)

        x_star = getCondGrad(grad, k)

        step = 2.0/(iter_num + 2) 

        x = step*x_star + (1 - step)*x

        obj = getImportanceRelax(G, x_good, x, nsamples, influ_obj, if_herd, a)
        
        toc = time.clock()

        print "Iteration: ", iter_num, "    obj = ", obj.item(), "  time = ", (toc - tic),  "   Total/New/Cache: ", influ_obj.itr_total , influ_obj.itr_new , influ_obj.itr_cache

        f.write(str(toc - tic) + " " + str(obj.item()) + " " + str(influ_obj.itr_total) + '/' + str(influ_obj.itr_new) + '/' + str(influ_obj.itr_cache) + "\n") 


        for x_t in x:
            f2.write(str(x_t.item()) + '\n')
        f2.write('\n')

    f.close()
    f2.close()

    x_opt = x

    #Round the optimum solution and get function values
    top_k = Variable(torch.zeros(N)) #conditional grad
    sorted_ind = torch.sort(x_opt, descending = True)[1][0:k]
    top_k[sorted_ind] = 1
    gt_val = submodObj(G, top_k, p, 100)

    #Save optimum solution and value
    f = open(opt_file, 'w')

    f.write(str(gt_val.item()) + '\n')

    for x_t in x_opt:
        f.write(str(x_t.item()) + '\n')
    f.close()

    return x_opt