def para_replace(city, defense_name, checkin, ratio, step): ''' para replace Args: city: city defense_name: 20_replace_20 checkin: checkin data ratio: obfuscate proportion, 0.1, 0.2, 0.3 ... step: step length, (odd) Returns: checkin: checkin data ''' # intialize pd.DataFrame([checkin.columns]).to_csv('dataset/'+city+'/defense/'+\ city+'_'+defense_name+'.checkin', index=False, header=None) ul_graph, lu_graph = ul_graph_build(checkin, 'locid') core_num = mp.cpu_count() print(core_num) Parallel(n_jobs = core_num)(delayed(replace_core)(\ city, defense_name, checkin.loc[checkin.uid==u], ul_graph, lu_graph, ratio, step) for u in checkin.uid.unique()) checkin = pd.read_csv('dataset/'+city+'/defense/'+city+'_'+defense_name+'.checkin',\ error_bad_lines=False) checkin = checkin.dropna().reset_index(drop=True) checkin.uid = checkin.uid.apply(float).apply(int) checkin.locid = checkin.locid.apply(float).apply(int) checkin = checkin.reset_index(drop=True) checkin.to_csv('dataset/'+city+'/defense/'+city+'_'+defense_name+'.checkin',\ index=False) return checkin
def single_run(city, cicnt=20, wl=100, wt=20, n_feature=128, new_run=False): # city = "Brightkite"# ny la london Gowalla Brightkite # cicnt = 20 folder_setup(city) checkin, friends = data_process(city, cicnt) ul_graph, lu_graph = ul_graph_build(checkin, 'locid') model_name = str(cicnt) + '_locid' print(model_name) walk_len, walk_times = 100, 20 # maximal 100 walk_len, 20 walk_times print('walking') if new_run: para_ul_random_batch(city, model_name, checkin.uid.unique(), ul_graph, lu_graph, walk_len, walk_times) print('walk done') print('emb training') emb_train(city, model_name, wl, wt, n_feature) print('emb training done') feature_construct(city, model_name, friends, wl, wt, n_feature) unsuper_friends_predict(city, model_name, wl, wt, n_feature)
def single_run(city, cicnt, ratio): # ratio = int(sys.argv[3])# 10 20 30 40 ratio = ratio * 1.0 / 100 folder_setup(city) checkin, friends = data_process(city, cicnt) defense_name = str(cicnt) + '_hiding_' + str(int(ratio * 100)) print(defense_name) checkin = para_hiding(city, defense_name, checkin, ratio) ul_graph, lu_graph = ul_graph_build(checkin, 'locid') model_name = str(cicnt) + '_locid_hiding_' + str(int(ratio * 100)) print(model_name) walk_len, walk_times = 100, 20 # maximal 100 walk_len, 20 walk_times print('walking') para_ul_random_walk(city, model_name, checkin.uid.unique(), ul_graph, lu_graph, walk_len, walk_times) print('walk done') print('emb training') emb_train(city, model_name) print('emb training done') feature_construct(city, model_name, friends) unsuper_friends_predict(city, model_name)
def single_ex_run(city, cicnt): folder_setup(city) checkin, friends = data_process(city, cicnt) defense_name = str(cicnt) + '_ex' print(defense_name) checkin = extreme_balance(city, defense_name, checkin) ul_graph, lu_graph = ul_graph_build(checkin, 'locid') model_name = str(cicnt) + '_locid_ex' print(model_name) walk_len, walk_times = 100, 20 # maximal 100 walk_len, 20 walk_times print('walking') para_ul_random_batch(city, model_name, checkin.uid.unique(), ul_graph, lu_graph, walk_len, walk_times) print('walk done') print('emb training') emb_train(city, model_name) print('emb training done') feature_construct(city, model_name, friends) unsuper_friends_predict(city, model_name)
def batch_random_walk(num_user, num_location): city = 'workload_' + str(num_user) + '_' + str(num_location) folder_setup(city) cicnt = 20 checkin, friends = data_process(city, cicnt) ul_graph, lu_graph = ul_graph_build(checkin, 'locid') model_name = str(cicnt) + '_locid' print(model_name) walk_len, walk_times = 50, 10 # maximal 100 walk_len, 20 walk_times print('walking') para_ul_random_batch(city, model_name, checkin.uid.unique(), ul_graph, lu_graph, walk_len, walk_times) print('walk done')
def single_replace(city, cicnt, ratio, step, fail_to_continue=False): ratio = ratio * 1.0 / 100 folder_setup(city) checkin, friends = data_process(city, cicnt) defense_name = str(cicnt) + '_replace_' + str(int( ratio * 100)) + '_' + str(int(step)) model_name = str(cicnt) + '_locid_replace_' + str(int( ratio * 100)) + '_' + str(int(step)) if not fail_to_continue: checkin = para_replace(city, defense_name, checkin, ratio, step) else: checkin = pd.read_csv('dataset/'+ city + '/defense/' + city + \ '_20_replace_'+ str(int(ratio * 100)) + '_' + str(int(step)) + '.checkin') ul_graph, lu_graph = ul_graph_build(checkin, 'locid') walk_len, walk_times = 100, 20 # maximal 100 walk_len, 20 walk_times print('walking') if not fail_to_continue: para_ul_random_batch(city, model_name, checkin.uid.unique(), ul_graph, lu_graph, walk_len, walk_times) print('walk done') print('emb training') emb_train(city, model_name) print('emb training done') feature_construct(city, model_name, friends) unsuper_friends_predict(city, model_name)
city = sys.argv[1] cicnt = int(sys.argv[2]) ratio = int(sys.argv[3]) # 10 20 30 40 ratio = ratio * 1.0 / 100 step = int(sys.argv[4]) folder_setup(city) checkin, friends = data_process(city, cicnt) defense_name = str(cicnt) + '_replace_' + str(int(ratio * 100)) + '_' + str( int(step)) checkin = para_replace(city, defense_name, checkin, ratio, step) ul_graph, lu_graph = ul_graph_build(checkin, 'locid') model_name = str(cicnt) + '_locid_replace_' + str(int( ratio * 100)) + '_' + str(int(step)) walk_len, walk_times = 100, 20 # maximal 100 walk_len, 20 walk_times print 'walking' para_ul_random_walk(city, model_name, checkin.uid.unique(), ul_graph, lu_graph,\ walk_len, walk_times) print 'walk done' print 'emb training' emb_train(city, model_name) print 'emb training done'