Ejemplo n.º 1
0
def load_data(args):
    """
    this is for cleaned data

    There are two types of persona graph:
    1. persona_ori: original_persona
    2. persona_POI: persona with separated POIs

    use args.input_type to change between these types
    """
    mat = loadmat('dataset/cleaned_{}.mat'.format(args.dataset_name))
    friendship_old_ori = mat_to_numpy_array(mat['friendship_old'])
    friendship_new = mat_to_numpy_array(mat["friendship_new"])

    # edgelist_path = 'Suhi_output/edgelist_{}_{}'.format(args.dataset_name, args.POI_level)
    # persona_to_ori_path = 'Suhi_output/ego_net_{}_{}'.format(args.dataset_name, args.POI_level)
    # edgelistPOI_path = 'Suhi_output/edgelistPOI_{}_{}'.format(args.dataset_name, args.POI_level)
    # location_map_path = 'Suhi_output/location_dict_{}'.format(args.dataset_name)

    # if args.input_type == "persona_ori":
    #     friendship_old_persona, maps_PtOri, maps_OritP, center_ori_maps  = load_ego(edgelist_path, persona_to_ori_path, friendship_old_ori = friendship_old_ori)
    #     persona_checkins = create_persona_checkins(mat['selected_checkins'], maps_OritP)
    # elif args.input_type == "persona_POI":
    #     friendship_old_persona, maps_PtOri, persona_POI, POI_maps, maps_OritP, center_ori_maps = load_ego(edgelist_path, persona_to_ori_path, edgelistPOI_path, location_map_path, friendship_old_ori)
    #     persona_checkins = create_personaPOI_checkins(mat['selected_checkins'], maps_OritP, persona_POI, POI_maps, center_ori_maps)

    persona_checkins, offset1, offset2, offset3, n_nodes_total, n_users = renumber_checkins(
        persona_checkins, maps_PtOri)

    ############## Train Test split for POI prediction ##################
    n_data = persona_checkins.shape[0]
    n_train = n_data

    sorted_checkins = persona_checkins[np.argsort(persona_checkins[:, 1])]
    train_checkins = sorted_checkins[:n_train]
    val_checkins = sorted_checkins[n_train:]
    #####################################################################

    print("Build user checkins dictionary...")
    train_user_checkins = {}
    user_location = dict()
    for user_id in range(1, n_users + 1):
        inds_checkins = np.argwhere(train_checkins[:, 0] == user_id).flatten()
        checkins = train_checkins[inds_checkins]
        train_user_checkins[user_id] = checkins
        user_location[user_id] = set(np.unique(checkins[:, 2]).tolist())
        if args.test:
            break

    offsets = [offset1, offset2, offset3]
    checkins = [
        train_checkins, val_checkins, train_user_checkins, user_location
    ]
    count_nodes = [n_users, n_nodes_total]
    friendships = [friendship_old_ori, friendship_old_persona, friendship_new]
    maps = [maps_PtOri, maps_OritP]

    return offsets, checkins, count_nodes, friendships, maps, train_user_checkins, persona_checkins, center_ori_maps
Ejemplo n.º 2
0
def load_data(args):
    """
    this is for cleaned data
    """
    mat = loadmat('dataset/cleaned_{}.mat'.format(args.dataset_name))
    selected_checkins = mat['selected_checkins']
    friendship_old = mat["friendship_old"]  # edge index from 0
    friendship_new = mat["friendship_new"]
    selected_checkins, offset1, offset2, offset3, n_nodes_total, n_users = renumber_checkins(
        selected_checkins)

    ############## Train Test split for POI prediction ##################
    n_data = selected_checkins.shape[0]
    n_train = n_data

    sorted_checkins = selected_checkins[np.argsort(selected_checkins[:, 1])]
    train_checkins = sorted_checkins[:n_train]
    val_checkins = sorted_checkins[n_train:]
    #####################################################################

    print("Build user checkins dictionary...")
    train_user_checkins = {}
    user_location = dict()
    for user_id in range(1, n_users + 1):
        inds_checkins = np.argwhere(train_checkins[:, 0] == user_id).flatten()
        checkins = train_checkins[inds_checkins]
        train_user_checkins[user_id] = checkins
        user_location[user_id] = set(np.unique(checkins[:, 2]).tolist())

    # val_user_checkins = {}
    # for user_id in range(1, n_users+1):
    #     inds_checkins = np.argwhere(val_checkins[:,0] == user_id).flatten()
    #     checkins = val_checkins[inds_checkins]
    #     val_user_checkins[user_id] = checkins
    # everything here is from 1

    offsets = [offset1, offset2, offset3]
    checkins = [
        train_checkins, val_checkins, train_user_checkins, user_location
    ]
    count_nodes = [n_users, n_nodes_total]
    friendships = [friendship_old, friendship_new]
    return offsets, checkins, count_nodes, friendships, selected_checkins
Ejemplo n.º 3
0
    print("Min: {}, Max: {}, Len: {}".format(np.min(nodes), np.max(nodes),
                                             len(nodes)))
    friendship_old = friendship_old[np.argsort(friendship_old[:, 0])]
    return friendship_old, selected_checkins


if __name__ == "__main__":
    args = parse_args2()
    print(args)
    model = args.model
    embs = read_emb(args.emb_path, args.model)
    if args.POI:
        friendship, selected_checkins = read_input(args.dataset_name)
        friendship = friendship.astype(int)
        if model.lower() != "dhne":
            selected_checkins, o1, o2, o3, nt, nu = renumber_checkins(
                selected_checkins)
            if args.POI:
                n_trains = int(0.8 * len(selected_checkins))
                sorted_time = np.argsort(selected_checkins[:, 1])
                train_indices = sorted_time[:n_trains]
                test_indices = sorted_time[n_trains:]
                train_checkins = selected_checkins[train_indices]
                test_checkins = selected_checkins[test_indices]
                print(test_checkins)

            max_test_checkins = np.max(test_checkins)
            if max_test_checkins > embs.shape[0]:
                print("Max test checkins: {}, emb shape: {}".format(
                    max_test_checkins, embs.shape))
                to_add = embs[0:max_test_checkins - embs.shape[0]].reshape(
                    -1, embs.shape[1])
Ejemplo n.º 4
0
def load_data(args):
    """
    this is for cleaned data

    There are two types of persona graph:
    1. persona_ori: original_persona
    2. persona_POI: persona with separated POIs

    use args.input_type to change between these types
    """
    mat = loadmat('dataset/cleaned_{}.mat'.format(args.dataset_name))
    friendship_old_ori = mat_to_numpy_array(mat['friendship_old'])
    friendship_new = mat_to_numpy_array(mat["friendship_new"])

    edgelist_path = 'Suhi_output/edgelist_{}_{}'.format(
        args.dataset_name, args.POI_level)
    persona_to_ori_path = 'Suhi_output/ego_net_{}_{}'.format(
        args.dataset_name, args.POI_level)
    edgelistPOI_path = 'Suhi_output/edgelistPOI_{}_{}'.format(
        args.dataset_name, args.POI_level)
    location_map_path = 'Suhi_output/location_dict_{}'.format(
        args.dataset_name)

    before_selected_checkins = mat['selected_checkins']
    n_train = int(len(before_selected_checkins) * 0.8)
    # before_selected_checkins = before_selected_checkins[np.argsort(before_selected_checkins[:, 1])]
    sorted_time = np.argsort(before_selected_checkins[:, 1])
    train_indices = sorted_time[:n_train]

    if args.input_type == "persona_ori":
        friendship_old_persona, maps_PtOri, maps_OritP, center_ori_maps = load_ego(
            edgelist_path,
            persona_to_ori_path,
            friendship_old_ori=friendship_old_ori)
        persona_checkins, new_train_indices, new_test_indices = create_persona_checkins(
            mat['selected_checkins'], maps_OritP, train_indices)
    elif args.input_type == "persona_POI":
        friendship_old_persona, maps_PtOri, persona_POI, POI_maps, maps_OritP, center_ori_maps = load_ego(
            edgelist_path, persona_to_ori_path, edgelistPOI_path,
            location_map_path)
        persona_checkins, new_train_indices, new_test_indices = create_personaPOI_checkins(
            mat['selected_checkins'], maps_OritP, persona_POI, POI_maps,
            center_ori_maps, train_indices)

    persona_checkins, offset1, offset2, offset3, n_nodes_total, n_users = renumber_checkins(
        persona_checkins, maps_PtOri)

    train_checkins = persona_checkins[new_train_indices]
    val_checkins = persona_checkins[new_test_indices]
    new_val_checkins = []
    user_checkins = dict()
    for i in range(len(val_checkins)):
        checkin_i = val_checkins[i]
        user = checkin_i[0]
        time = checkin_i[1]
        location = checkin_i[2]
        ori_user = maps_PtOri[user]
        key = "{}_{}_{}".format(ori_user, time, location)
        if key not in user_checkins:
            user_checkins[key] = 0
            checkin_i[0] = ori_user
            new_val_checkins.append(checkin_i.tolist())

    print("Num val checkins before: {}".format(len(val_checkins)))
    val_checkins = np.array(new_val_checkins)
    print("Num val checkins after: {}".format(len(val_checkins)))

    ###############################################
    edgelist_path = 'Suhi_output/edgelist_{}_{}'.format(
        args.dataset_name, args.POI_level)
    persona_to_ori_path = 'Suhi_output/ego_net_{}_{}'.format(
        args.dataset_name, args.POI_level)
    edgelistPOI_path = 'Suhi_output/edgelistPOI_{}_{}'.format(
        args.dataset_name, args.POI_level)
    location_map_path = 'Suhi_output/location_dict_{}_{}'.format(
        args.dataset_name, args.POI_level)  ######################

    print("Build user checkins dictionary...")
    train_user_checkins = {}
    user_location = dict()
    for user_id in range(1, n_users + 1):
        inds_checkins = np.argwhere(train_checkins[:, 0] == user_id).flatten()
        checkins = train_checkins[inds_checkins]
        train_user_checkins[user_id] = checkins
        user_location[user_id] = set(np.unique(checkins[:, 2]).tolist())

    offsets = [offset1, offset2, offset3]
    checkins = [
        train_checkins, val_checkins, train_user_checkins, user_location
    ]
    count_nodes = [n_users, n_nodes_total]
    friendships = [friendship_old_ori, friendship_old_persona, friendship_new]
    maps = [maps_PtOri, maps_OritP]

    return offsets, checkins, count_nodes, friendships, maps, train_user_checkins, persona_checkins, center_ori_maps
Ejemplo n.º 5
0
            new_time += 1
    additional_checkins = np.array(additional_checkins)
    print("Num add: {}".format(len(additional_checkins)))
    if len(additional_checkins) > 0:
        selected_checkins = np.concatenate((selected_checkins, additional_checkins), axis=0)
    return selected_checkins

if __name__ == "__main__":
    args = parse_args()
    print(args)
    model = args.model 
    friendship, selected_checkins = read_input(args.dataset_name)
    friendship = friendship.astype(int)
    
    if model.lower() != "dhne" or 1:
        selected_checkins, o1, o2, o3, nt, nu = renumber_checkins(selected_checkins) # from 1 to the end
        if args.POI:
            n_trains = int(0.8 * len(selected_checkins))
            sorted_time = np.argsort(selected_checkins[:, 1])
            train_indices = sorted_time[:n_trains]
            test_indices = sorted_time[n_trains:]
            test_checkins = selected_checkins[test_indices]
            train_checkins = selected_checkins[train_indices]
            """
            OK! Now we've got all the material we need
            """
        else:
            train_checkins = selected_checkins
        
    if model.lower() == "deepwalk":
        save_deepwalk(friendship, train_checkins, args.dataset_name)