def analysis(): user_data = read_file.read_checkins_file("unknown_user") user_dic = {} for user_id, total_location in user_data.items(): user_dic[user_id] = [] for daily_location in total_location: for single_location in daily_location: user_dic[user_id].append(single_location["location"]) home_dic = home_recommendation.get_home_location() result = {} result["home"] = [] result["visitor"] = [] result["non_visitor"] = [] for user_id, user_location in user_dic.items(): for single_location in user_location: for location_id, location_content in home_dic.items(): if user_id not in result["home"]: if single_location == location_id: result["home"].append(user_id) for user_id, total_location in user_data.items(): if user_id not in result["home"]: if len(total_location) > nonvisitor_threshold: result["non_visitor"].append(user_id) else: result["visitor"].append(user_id) #pprint.pprint(result) return result
def unknownUser_knownLocation(): # 100 unknown users with 533736 known location new_dic = {} unknown_user_data = read_file.read_checkins_file("unknown_user") unknown_user_list = list(unknown_user_data.keys()) loc_list = [] for keyname, valuename in unknown_user_data.items(): for dailyvalue in valuename: for eachvalue in dailyvalue: eachloc = eachvalue["location"] loc_list.append(eachloc) new_dic[keyname] = loc_list pprint.pprint(new_dic) known_loc = read_file.read_known_location() unknownUser_knownLoc = (len(unknown_user_data), len(known_loc) ) #(2,3,4)变为3维 maxtrix_unknownUser_knownLoc = np.zeros(unknownUser_knownLoc) #pprint.pprint(np.zeros(unknownUser_knownLoc)) for username, localist in new_dic.items(): userindex = unknown_user_list.index(username) for location in localist: try: locationindex = known_loc.index(location) except: continue maxtrix_unknownUser_knownLoc[userindex, locationindex] += 1 pprint.pprint(maxtrix_unknownUser_knownLoc) a = np.array([1, 2, 3, 4, 5]) np.savetxt("b.txt", a) np.savetxt("matrix.txt", maxtrix_unknownUser_knownLoc)
def knownUser_knownLocation(): # 3884 unknown users with 533736 known location new_dic = {} known_user_data = read_file.read_checkins_file("known_user") known_user_list = list(known_user_data.keys()) pprint.pprint(len(known_user_list)) loc_list = [] for keyname, valuename in known_user_data.items(): for dailyvalue in valuename: for eachvalue in dailyvalue: eachloc = eachvalue["location"] loc_list.append(eachloc) new_dic[keyname] = loc_list pprint.pprint(new_dic) known_loc = read_file.read_known_location() knownUser_knownLoc = (len(known_user_data), len(known_loc)) maxtrix_knownUser_knownLoc = np.zeros(knownUser_knownLoc) pprint.pprint(np.zeros(maxtrix_knownUser_knownLoc)) for username, localist in new_dic.items(): userindex = known_user_list.index(username) for location in localist: try: locationindex = known_loc.index(location) except: continue maxtrix_knownUser_knownLoc[userindex, locationindex] += 1
def recommend(): unknown_user_checkins_dict = read_file.read_checkins_file('unknown_user') location_info_dict = read_file.read_location_file() average_location_dict = get_average_location(unknown_user_checkins_dict, location_info_dict) candidate_location_distance_dict = compute_distance( average_location_dict, location_info_dict) result_rank = rank_candidate_location(candidate_location_distance_dict) return result_rank
def recommend(): user_data = read_file.read_checkins_file("unknown_user") user_list = user_data.keys() candidate_list = candidate_popularity() result = {} for user in user_list: result[user] = [] for candidate in candidate_list: result[user].append(candidate) #pprint.pprint(result) return result
def get_user_home(home_dict): unknown_user_checkins_dict = read_file.read_checkins_file('unknown_user') home_list = list(home_dict.keys()) user_home_location_dict = {} for user_name, user_dict in unknown_user_checkins_dict.items(): home_location = have_home(user_dict, home_list) if home_location: user_home_location_dict[user_name] = home_dict[home_location] # print('user_home_location_dict',len(user_home_location_dict)) return user_home_location_dict
def get_unknown_user(): user_data = read_file.read_checkins_file("unknown_user") user_dic = {} for user_id, user_total_location in user_data.items(): user_time_list = [] for user_daily_location in user_total_location: for user_location in user_daily_location: if user_location["location"] == "?": user_time_list.append(user_location["time"]) user_dic[user_id] = user_time_list #pprint.pprint(user_dic) return user_dic
def total_popularity(): read_data = {} read_data = read_file.read_checkins_file() total_location = read_data.values() popularity = {} for daily_data in total_location: for daily_location in daily_data: for location_data in daily_location: if popularity.get(location_data["location"]) != None: popularity[location_data["location"]] +=1 else: popularity[location_data["location"]] = 1 return popularity
def knownUser_unKnownLocation(): # 3884 known users with 100 unknown location new_dic = {} known_user_data = read_file.read_checkins_file("known_user") known_user_list = list(known_user_data.keys()) loc_list = [] pprint.pprint("test") for keyname, valuename in known_user_data.items(): for dailyvalue in valuename: for eachvalue in dailyvalue: eachloc = eachvalue["location"] loc_list.append(eachloc) new_dic[keyname] = loc_list pprint.pprint("test2") ##pprint.pprint(new_dic) pprint.pprint("test3") known_loc = read_file.read_location_file() pprint.pprint("test4") user_dict = {} for keyname, listvalue in new_dic.items(): user_dict[keyname] = {} for locKey in known_loc.keys(): user_dict[keyname][locKey] = 0 for value in listvalue: user_dict[keyname][value] += 1 pprint.pprint("loop2") pprint.pprint(user_dict["9448"]) unknown_loc = read_file.read_candidate_file() knownUser_unknownLoc = (len(known_user_data), len(unknown_loc) ) #(2,3,4)变为3维 maxtrix_knownUser_unknownLoc = np.zeros(knownUser_unknownLoc) #pprint.pprint(np.zeros(knownUser_unknownLoc)) ##pprint.pprint("build zero matrix") '''
def get_person_location_type(): user_data = read_file.read_checkins_file("unknown_user") location_data = read_file.read_location_file() user_location_dic = {} for user_id,user_location in user_data.items(): user_location_dic[user_id] = [] for daily_location in user_location: for location in daily_location: if location["location"] != "?": location_type = location_data[location["location"]]["type"] if "\x1a\x1a" in location_type: location_type = "Café" user_location_dic[user_id].append(location_type) for user_id,user_location_type in user_location_dic.items(): user_location_dic[user_id] = {} for single_location_type in user_location_type: user_location_dic[user_id][single_location_type] = user_location_type.count(single_location_type) #pprint.pprint(user_location_dic) return user_location_dic
def recommend(): # 3884 known users with 100 unknown location new_dic = {} unknown_dic = {} known_user_data = read_file.read_checkins_file("known_user") # pprint.pprint("test") unknown_user_data = read_file.read_checkins_file("unknown_user") for keyname, valuename in known_user_data.items(): loc_list = [] for dailyvalue in valuename: for eachvalue in dailyvalue: eachloc = eachvalue["location"] loc_list.append(eachloc) new_dic[keyname] = loc_list #pprint.pprint(new_dic) for keyname, valuename in unknown_user_data.items(): loc_list = [] for dailyvalue in valuename: for eachvalue in dailyvalue: eachloc = eachvalue["location"] loc_list.append(eachloc) unknown_dic[keyname] = loc_list knn_rank_dic = {} for cur_userid, cur_loclist in unknown_dic.items(): rank_user_sim = {} for next_userid, next_loclist in new_dic.items(): count = 0 #if next_userid != "1006" and next_userid != "1437": for eachLoc in cur_loclist: if eachLoc in next_loclist: count += 1 rank_user_sim[next_userid] = count #pprint.pprint(len(rank_user_sim)) rank_list = [] for rankUser, rankValue in rank_user_sim.items(): rank_list.append([rankValue, rankUser]) rank_list.sort(reverse=True) knn_list = [] for time in range(0, 5): knn_list.append(rank_list[time][1]) knn_rank_dic[cur_userid] = knn_list #pprint.pprint(knn_rank_dic) candidate_data = read_file.read_candidate_file() candidate_dic = {} for user_id, user_candidate in knn_rank_dic.items(): candidate_dic[user_id] = {} for candidate in candidate_data: count = 0 for user_data in user_candidate: for user_location in new_dic[user_data]: if candidate == user_location: count += 1 candidate_dic[user_id][candidate] = count result = {} for user_id, user_candidate in candidate_dic.items(): candidate_rank_list = [] result_list = [] for candidate_id, candidate_value in user_candidate.items(): candidate_rank_list.append([candidate_value, candidate_id]) candidate_rank_list.sort(reverse=True) for i in range(0, len(candidate_rank_list)): result_list.append(candidate_rank_list[i][1]) result[user_id] = result_list # pprint.pprint(result["95132"]) return result