Beispiel #1
0
def analysis():
    user_data = read_file.read_checkins_file("unknown_user")
    user_dic = {}
    for user_id, total_location in user_data.items():
        user_dic[user_id] = []
        for daily_location in total_location:
            for single_location in daily_location:
                user_dic[user_id].append(single_location["location"])

    home_dic = home_recommendation.get_home_location()

    result = {}
    result["home"] = []
    result["visitor"] = []
    result["non_visitor"] = []

    for user_id, user_location in user_dic.items():
        for single_location in user_location:
            for location_id, location_content in home_dic.items():
                if user_id not in result["home"]:
                    if single_location == location_id:
                        result["home"].append(user_id)

    for user_id, total_location in user_data.items():
        if user_id not in result["home"]:
            if len(total_location) > nonvisitor_threshold:
                result["non_visitor"].append(user_id)
            else:
                result["visitor"].append(user_id)

    #pprint.pprint(result)
    return result
Beispiel #2
0
def unknownUser_knownLocation():
    # 100 unknown users with 533736 known location
    new_dic = {}
    unknown_user_data = read_file.read_checkins_file("unknown_user")
    unknown_user_list = list(unknown_user_data.keys())
    loc_list = []

    for keyname, valuename in unknown_user_data.items():
        for dailyvalue in valuename:
            for eachvalue in dailyvalue:
                eachloc = eachvalue["location"]
                loc_list.append(eachloc)
        new_dic[keyname] = loc_list

    pprint.pprint(new_dic)

    known_loc = read_file.read_known_location()
    unknownUser_knownLoc = (len(unknown_user_data), len(known_loc)
                            )  #(2,3,4)变为3维
    maxtrix_unknownUser_knownLoc = np.zeros(unknownUser_knownLoc)
    #pprint.pprint(np.zeros(unknownUser_knownLoc))

    for username, localist in new_dic.items():
        userindex = unknown_user_list.index(username)
        for location in localist:
            try:
                locationindex = known_loc.index(location)
            except:
                continue
            maxtrix_unknownUser_knownLoc[userindex, locationindex] += 1
    pprint.pprint(maxtrix_unknownUser_knownLoc)
    a = np.array([1, 2, 3, 4, 5])
    np.savetxt("b.txt", a)
    np.savetxt("matrix.txt", maxtrix_unknownUser_knownLoc)
Beispiel #3
0
def knownUser_knownLocation():
    # 3884 unknown users with 533736 known location
    new_dic = {}
    known_user_data = read_file.read_checkins_file("known_user")
    known_user_list = list(known_user_data.keys())
    pprint.pprint(len(known_user_list))
    loc_list = []

    for keyname, valuename in known_user_data.items():
        for dailyvalue in valuename:
            for eachvalue in dailyvalue:
                eachloc = eachvalue["location"]
                loc_list.append(eachloc)
        new_dic[keyname] = loc_list

    pprint.pprint(new_dic)

    known_loc = read_file.read_known_location()
    knownUser_knownLoc = (len(known_user_data), len(known_loc))
    maxtrix_knownUser_knownLoc = np.zeros(knownUser_knownLoc)
    pprint.pprint(np.zeros(maxtrix_knownUser_knownLoc))

    for username, localist in new_dic.items():
        userindex = known_user_list.index(username)
        for location in localist:
            try:
                locationindex = known_loc.index(location)
            except:
                continue
            maxtrix_knownUser_knownLoc[userindex, locationindex] += 1
Beispiel #4
0
def recommend():
    unknown_user_checkins_dict = read_file.read_checkins_file('unknown_user')
    location_info_dict = read_file.read_location_file()
    average_location_dict = get_average_location(unknown_user_checkins_dict,
                                                 location_info_dict)
    candidate_location_distance_dict = compute_distance(
        average_location_dict, location_info_dict)
    result_rank = rank_candidate_location(candidate_location_distance_dict)
    return result_rank
Beispiel #5
0
def recommend():
    user_data = read_file.read_checkins_file("unknown_user")
    user_list = user_data.keys()
    candidate_list = candidate_popularity()
    result = {}
    for user in user_list:
        result[user] = []
        for candidate in candidate_list:
            result[user].append(candidate)
    #pprint.pprint(result)
    return result
def get_user_home(home_dict):
    unknown_user_checkins_dict = read_file.read_checkins_file('unknown_user')
    home_list = list(home_dict.keys())
    user_home_location_dict = {}
    for user_name, user_dict in unknown_user_checkins_dict.items():
        home_location = have_home(user_dict, home_list)
        if home_location:

            user_home_location_dict[user_name] = home_dict[home_location]
    # print('user_home_location_dict',len(user_home_location_dict))
    return user_home_location_dict
Beispiel #7
0
def get_unknown_user():
    user_data = read_file.read_checkins_file("unknown_user")
    user_dic = {}
    for user_id, user_total_location in user_data.items():
        user_time_list = []
        for user_daily_location in user_total_location:
            for user_location in user_daily_location:
                if user_location["location"] == "?":
                    user_time_list.append(user_location["time"])
        user_dic[user_id] = user_time_list
    #pprint.pprint(user_dic)
    return user_dic
Beispiel #8
0
def total_popularity():
    read_data = {}
    read_data = read_file.read_checkins_file()
    total_location = read_data.values()
    popularity = {}
    for daily_data in total_location:
        for daily_location in daily_data:
            for location_data in daily_location:
                if popularity.get(location_data["location"]) != None:
                    popularity[location_data["location"]] +=1
                else:
                    popularity[location_data["location"]] = 1

    return popularity
Beispiel #9
0
def knownUser_unKnownLocation():
    # 3884 known users with 100 unknown location
    new_dic = {}
    known_user_data = read_file.read_checkins_file("known_user")
    known_user_list = list(known_user_data.keys())
    loc_list = []
    pprint.pprint("test")

    for keyname, valuename in known_user_data.items():
        for dailyvalue in valuename:
            for eachvalue in dailyvalue:
                eachloc = eachvalue["location"]
                loc_list.append(eachloc)
        new_dic[keyname] = loc_list
    pprint.pprint("test2")
    ##pprint.pprint(new_dic)

    pprint.pprint("test3")

    known_loc = read_file.read_location_file()
    pprint.pprint("test4")

    user_dict = {}

    for keyname, listvalue in new_dic.items():
        user_dict[keyname] = {}
        for locKey in known_loc.keys():

            user_dict[keyname][locKey] = 0
        for value in listvalue:
            user_dict[keyname][value] += 1
            pprint.pprint("loop2")
    pprint.pprint(user_dict["9448"])

    unknown_loc = read_file.read_candidate_file()
    knownUser_unknownLoc = (len(known_user_data), len(unknown_loc)
                            )  #(2,3,4)变为3维
    maxtrix_knownUser_unknownLoc = np.zeros(knownUser_unknownLoc)
    #pprint.pprint(np.zeros(knownUser_unknownLoc))
    ##pprint.pprint("build zero matrix")
    '''
Beispiel #10
0
def get_person_location_type():
    user_data = read_file.read_checkins_file("unknown_user")
    location_data = read_file.read_location_file()
    user_location_dic = {}
    for user_id,user_location in user_data.items():
        user_location_dic[user_id] = []
        for daily_location in user_location:
            for location in daily_location:
                if location["location"] != "?":
                    location_type = location_data[location["location"]]["type"]
                    if "\x1a\x1a" in location_type:
                        location_type = "Café"
                    user_location_dic[user_id].append(location_type)

    for user_id,user_location_type in user_location_dic.items():
        user_location_dic[user_id] = {}
        for single_location_type in user_location_type:
            user_location_dic[user_id][single_location_type] = user_location_type.count(single_location_type)

    #pprint.pprint(user_location_dic)
    return user_location_dic
def recommend():
    # 3884 known users with 100 unknown location
    new_dic = {}
    unknown_dic = {}
    known_user_data = read_file.read_checkins_file("known_user")
    # pprint.pprint("test")
    unknown_user_data = read_file.read_checkins_file("unknown_user")
    for keyname, valuename in known_user_data.items():
        loc_list = []
        for dailyvalue in valuename:
            for eachvalue in dailyvalue:
                eachloc = eachvalue["location"]
                loc_list.append(eachloc)
        new_dic[keyname] = loc_list

    #pprint.pprint(new_dic)

    for keyname, valuename in unknown_user_data.items():
        loc_list = []
        for dailyvalue in valuename:
            for eachvalue in dailyvalue:
                eachloc = eachvalue["location"]
                loc_list.append(eachloc)
        unknown_dic[keyname] = loc_list

    knn_rank_dic = {}

    for cur_userid, cur_loclist in unknown_dic.items():
        rank_user_sim = {}
        for next_userid, next_loclist in new_dic.items():
            count = 0
            #if next_userid != "1006" and next_userid != "1437":
            for eachLoc in cur_loclist:
                if eachLoc in next_loclist:
                    count += 1

            rank_user_sim[next_userid] = count
            #pprint.pprint(len(rank_user_sim))
        rank_list = []
        for rankUser, rankValue in rank_user_sim.items():
            rank_list.append([rankValue, rankUser])
        rank_list.sort(reverse=True)
        knn_list = []
        for time in range(0, 5):
            knn_list.append(rank_list[time][1])
        knn_rank_dic[cur_userid] = knn_list
    #pprint.pprint(knn_rank_dic)

    candidate_data = read_file.read_candidate_file()
    candidate_dic = {}

    for user_id, user_candidate in knn_rank_dic.items():
        candidate_dic[user_id] = {}
        for candidate in candidate_data:
            count = 0
            for user_data in user_candidate:
                for user_location in new_dic[user_data]:
                    if candidate == user_location:
                        count += 1
            candidate_dic[user_id][candidate] = count

    result = {}
    for user_id, user_candidate in candidate_dic.items():
        candidate_rank_list = []
        result_list = []
        for candidate_id, candidate_value in user_candidate.items():
            candidate_rank_list.append([candidate_value, candidate_id])
        candidate_rank_list.sort(reverse=True)
        for i in range(0, len(candidate_rank_list)):
            result_list.append(candidate_rank_list[i][1])
        result[user_id] = result_list

    # pprint.pprint(result["95132"])
    return result