def load_data(args): """ this is for cleaned data There are two types of persona graph: 1. persona_ori: original_persona 2. persona_POI: persona with separated POIs use args.input_type to change between these types """ mat = loadmat('dataset/cleaned_{}.mat'.format(args.dataset_name)) friendship_old_ori = mat_to_numpy_array(mat['friendship_old']) friendship_new = mat_to_numpy_array(mat["friendship_new"]) # edgelist_path = 'Suhi_output/edgelist_{}_{}'.format(args.dataset_name, args.POI_level) # persona_to_ori_path = 'Suhi_output/ego_net_{}_{}'.format(args.dataset_name, args.POI_level) # edgelistPOI_path = 'Suhi_output/edgelistPOI_{}_{}'.format(args.dataset_name, args.POI_level) # location_map_path = 'Suhi_output/location_dict_{}'.format(args.dataset_name) # if args.input_type == "persona_ori": # friendship_old_persona, maps_PtOri, maps_OritP, center_ori_maps = load_ego(edgelist_path, persona_to_ori_path, friendship_old_ori = friendship_old_ori) # persona_checkins = create_persona_checkins(mat['selected_checkins'], maps_OritP) # elif args.input_type == "persona_POI": # friendship_old_persona, maps_PtOri, persona_POI, POI_maps, maps_OritP, center_ori_maps = load_ego(edgelist_path, persona_to_ori_path, edgelistPOI_path, location_map_path, friendship_old_ori) # persona_checkins = create_personaPOI_checkins(mat['selected_checkins'], maps_OritP, persona_POI, POI_maps, center_ori_maps) persona_checkins, offset1, offset2, offset3, n_nodes_total, n_users = renumber_checkins( persona_checkins, maps_PtOri) ############## Train Test split for POI prediction ################## n_data = persona_checkins.shape[0] n_train = n_data sorted_checkins = persona_checkins[np.argsort(persona_checkins[:, 1])] train_checkins = sorted_checkins[:n_train] val_checkins = sorted_checkins[n_train:] ##################################################################### print("Build user checkins dictionary...") train_user_checkins = {} user_location = dict() for user_id in range(1, n_users + 1): inds_checkins = np.argwhere(train_checkins[:, 0] == user_id).flatten() checkins = train_checkins[inds_checkins] train_user_checkins[user_id] = checkins user_location[user_id] = set(np.unique(checkins[:, 2]).tolist()) if args.test: break offsets = [offset1, offset2, offset3] checkins = [ train_checkins, val_checkins, train_user_checkins, user_location ] count_nodes = [n_users, n_nodes_total] friendships = [friendship_old_ori, friendship_old_persona, friendship_new] maps = [maps_PtOri, maps_OritP] return offsets, checkins, count_nodes, friendships, maps, train_user_checkins, persona_checkins, center_ori_maps
def load_data(args): """ this is for cleaned data """ mat = loadmat('dataset/cleaned_{}.mat'.format(args.dataset_name)) selected_checkins = mat['selected_checkins'] friendship_old = mat["friendship_old"] # edge index from 0 friendship_new = mat["friendship_new"] selected_checkins, offset1, offset2, offset3, n_nodes_total, n_users = renumber_checkins( selected_checkins) ############## Train Test split for POI prediction ################## n_data = selected_checkins.shape[0] n_train = n_data sorted_checkins = selected_checkins[np.argsort(selected_checkins[:, 1])] train_checkins = sorted_checkins[:n_train] val_checkins = sorted_checkins[n_train:] ##################################################################### print("Build user checkins dictionary...") train_user_checkins = {} user_location = dict() for user_id in range(1, n_users + 1): inds_checkins = np.argwhere(train_checkins[:, 0] == user_id).flatten() checkins = train_checkins[inds_checkins] train_user_checkins[user_id] = checkins user_location[user_id] = set(np.unique(checkins[:, 2]).tolist()) # val_user_checkins = {} # for user_id in range(1, n_users+1): # inds_checkins = np.argwhere(val_checkins[:,0] == user_id).flatten() # checkins = val_checkins[inds_checkins] # val_user_checkins[user_id] = checkins # everything here is from 1 offsets = [offset1, offset2, offset3] checkins = [ train_checkins, val_checkins, train_user_checkins, user_location ] count_nodes = [n_users, n_nodes_total] friendships = [friendship_old, friendship_new] return offsets, checkins, count_nodes, friendships, selected_checkins
print("Min: {}, Max: {}, Len: {}".format(np.min(nodes), np.max(nodes), len(nodes))) friendship_old = friendship_old[np.argsort(friendship_old[:, 0])] return friendship_old, selected_checkins if __name__ == "__main__": args = parse_args2() print(args) model = args.model embs = read_emb(args.emb_path, args.model) if args.POI: friendship, selected_checkins = read_input(args.dataset_name) friendship = friendship.astype(int) if model.lower() != "dhne": selected_checkins, o1, o2, o3, nt, nu = renumber_checkins( selected_checkins) if args.POI: n_trains = int(0.8 * len(selected_checkins)) sorted_time = np.argsort(selected_checkins[:, 1]) train_indices = sorted_time[:n_trains] test_indices = sorted_time[n_trains:] train_checkins = selected_checkins[train_indices] test_checkins = selected_checkins[test_indices] print(test_checkins) max_test_checkins = np.max(test_checkins) if max_test_checkins > embs.shape[0]: print("Max test checkins: {}, emb shape: {}".format( max_test_checkins, embs.shape)) to_add = embs[0:max_test_checkins - embs.shape[0]].reshape( -1, embs.shape[1])
def load_data(args): """ this is for cleaned data There are two types of persona graph: 1. persona_ori: original_persona 2. persona_POI: persona with separated POIs use args.input_type to change between these types """ mat = loadmat('dataset/cleaned_{}.mat'.format(args.dataset_name)) friendship_old_ori = mat_to_numpy_array(mat['friendship_old']) friendship_new = mat_to_numpy_array(mat["friendship_new"]) edgelist_path = 'Suhi_output/edgelist_{}_{}'.format( args.dataset_name, args.POI_level) persona_to_ori_path = 'Suhi_output/ego_net_{}_{}'.format( args.dataset_name, args.POI_level) edgelistPOI_path = 'Suhi_output/edgelistPOI_{}_{}'.format( args.dataset_name, args.POI_level) location_map_path = 'Suhi_output/location_dict_{}'.format( args.dataset_name) before_selected_checkins = mat['selected_checkins'] n_train = int(len(before_selected_checkins) * 0.8) # before_selected_checkins = before_selected_checkins[np.argsort(before_selected_checkins[:, 1])] sorted_time = np.argsort(before_selected_checkins[:, 1]) train_indices = sorted_time[:n_train] if args.input_type == "persona_ori": friendship_old_persona, maps_PtOri, maps_OritP, center_ori_maps = load_ego( edgelist_path, persona_to_ori_path, friendship_old_ori=friendship_old_ori) persona_checkins, new_train_indices, new_test_indices = create_persona_checkins( mat['selected_checkins'], maps_OritP, train_indices) elif args.input_type == "persona_POI": friendship_old_persona, maps_PtOri, persona_POI, POI_maps, maps_OritP, center_ori_maps = load_ego( edgelist_path, persona_to_ori_path, edgelistPOI_path, location_map_path) persona_checkins, new_train_indices, new_test_indices = create_personaPOI_checkins( mat['selected_checkins'], maps_OritP, persona_POI, POI_maps, center_ori_maps, train_indices) persona_checkins, offset1, offset2, offset3, n_nodes_total, n_users = renumber_checkins( persona_checkins, maps_PtOri) train_checkins = persona_checkins[new_train_indices] val_checkins = persona_checkins[new_test_indices] new_val_checkins = [] user_checkins = dict() for i in range(len(val_checkins)): checkin_i = val_checkins[i] user = checkin_i[0] time = checkin_i[1] location = checkin_i[2] ori_user = maps_PtOri[user] key = "{}_{}_{}".format(ori_user, time, location) if key not in user_checkins: user_checkins[key] = 0 checkin_i[0] = ori_user new_val_checkins.append(checkin_i.tolist()) print("Num val checkins before: {}".format(len(val_checkins))) val_checkins = np.array(new_val_checkins) print("Num val checkins after: {}".format(len(val_checkins))) ############################################### edgelist_path = 'Suhi_output/edgelist_{}_{}'.format( args.dataset_name, args.POI_level) persona_to_ori_path = 'Suhi_output/ego_net_{}_{}'.format( args.dataset_name, args.POI_level) edgelistPOI_path = 'Suhi_output/edgelistPOI_{}_{}'.format( args.dataset_name, args.POI_level) location_map_path = 'Suhi_output/location_dict_{}_{}'.format( args.dataset_name, args.POI_level) ###################### print("Build user checkins dictionary...") train_user_checkins = {} user_location = dict() for user_id in range(1, n_users + 1): inds_checkins = np.argwhere(train_checkins[:, 0] == user_id).flatten() checkins = train_checkins[inds_checkins] train_user_checkins[user_id] = checkins user_location[user_id] = set(np.unique(checkins[:, 2]).tolist()) offsets = [offset1, offset2, offset3] checkins = [ train_checkins, val_checkins, train_user_checkins, user_location ] count_nodes = [n_users, n_nodes_total] friendships = [friendship_old_ori, friendship_old_persona, friendship_new] maps = [maps_PtOri, maps_OritP] return offsets, checkins, count_nodes, friendships, maps, train_user_checkins, persona_checkins, center_ori_maps
new_time += 1 additional_checkins = np.array(additional_checkins) print("Num add: {}".format(len(additional_checkins))) if len(additional_checkins) > 0: selected_checkins = np.concatenate((selected_checkins, additional_checkins), axis=0) return selected_checkins if __name__ == "__main__": args = parse_args() print(args) model = args.model friendship, selected_checkins = read_input(args.dataset_name) friendship = friendship.astype(int) if model.lower() != "dhne" or 1: selected_checkins, o1, o2, o3, nt, nu = renumber_checkins(selected_checkins) # from 1 to the end if args.POI: n_trains = int(0.8 * len(selected_checkins)) sorted_time = np.argsort(selected_checkins[:, 1]) train_indices = sorted_time[:n_trains] test_indices = sorted_time[n_trains:] test_checkins = selected_checkins[test_indices] train_checkins = selected_checkins[train_indices] """ OK! Now we've got all the material we need """ else: train_checkins = selected_checkins if model.lower() == "deepwalk": save_deepwalk(friendship, train_checkins, args.dataset_name)