def __init__(self, usr_loc_hist): # load categories hierarchy self.ch = hc.load() # number of users in system self.n_users = len(usr_loc_hist['user_id'].unique()) # user_venue count matrix self.uv_cnt = pd.crosstab(usr_loc_hist['user_id'], usr_loc_hist['business_id']) # user location history self.usr_loc_hist = usr_loc_hist.groupby['venue_category_name']
def train(events): # Load Categories Hierarchy print 'Loading Categories Hierarchy', datetime.now() h = ch.load() categories = h.keys() mab = ContainerMAB(n_arms=len(categories), n_bins=24) n_trials = events.shape[0] returned_cate = [None for i in range(n_trials)] rewards = [0.0 for i in range(n_trials)] cumulative_rewards = [0.0 for i in range(n_trials)] progress = n_trials for cnt, (r_ind, r) in enumerate(events.iterrows()): if cnt % (progress/10) == 0: print ' Progress:', cnt*100/progress, '%', datetime.now() t = r['datetime'] cate_idx = mab.select_arms(int(t.hour)) category = categories[cate_idx] returned_cate[cnt] = cate_idx # Compute Reward v_category = r['venue_category_id'] if v_category in h: reward = h.dist_to_LCA(category, v_category, 0) reward = 1 - (reward/3)**2 else: reward = 0 mab.update(t.hour, cate_idx, reward) # Compute Cumulative Reward rewards[cnt] = reward if cnt == 0: cumulative_rewards[cnt] = reward else: cumulative_rewards[cnt] = cumulative_rewards[cnt-1] + reward stats = { 'n_trials': n_trials, 'pulled_arms': returned_cate, 'rewards': rewards, 'cumulative_rewards': cumulative_rewards } return mab, stats
def train(events): # Load Categories Hierarchy print 'Loading Categories Hierarchy', datetime.now() h = ch.load() categories = h.keys() mab = ContainerMAB(n_arms=len(categories), n_bins=24) n_trials = events.shape[0] returned_cate = [None for i in range(n_trials)] rewards = [0.0 for i in range(n_trials)] cumulative_rewards = [0.0 for i in range(n_trials)] progress = n_trials for cnt, (r_ind, r) in enumerate(events.iterrows()): if cnt % (progress / 10) == 0: print ' Progress:', cnt * 100 / progress, '%', datetime.now() t = r['datetime'] cate_idx = mab.select_arms(int(t.hour)) category = categories[cate_idx] returned_cate[cnt] = cate_idx # Compute Reward v_category = r['venue_category_id'] if v_category in h: reward = h.dist_to_LCA(category, v_category, 0) reward = 1 - (reward / 3)**2 else: reward = 0 mab.update(t.hour, cate_idx, reward) # Compute Cumulative Reward rewards[cnt] = reward if cnt == 0: cumulative_rewards[cnt] = reward else: cumulative_rewards[cnt] = cumulative_rewards[cnt - 1] + reward stats = { 'n_trials': n_trials, 'pulled_arms': returned_cate, 'rewards': rewards, 'cumulative_rewards': cumulative_rewards } return mab, stats
def __init__(self, usr_loc_hist): # Load categories hierarchy self.ch = hc.load() # number of users in system self.n_users = len(usr_loc_hist['user_id'].unique()) # user-venue count matrix self.uv_cnt = pd.crosstab(usr_loc_hist['user_id'], usr_loc_hist['venue_id']) # user location history self.usr_loc_hist = usr_loc_hist.groupby(['venue_category_id']) # venues in categories self.venues = {} # user's preference weight self.ucw = {} # category experts self.experts = {} # Train self.offline()
import pickle import pandas as pd import categories_hierarchy as hc import json # Phoenix_rating_hist,LV_rating_hist = pickle.load(open('user_rating_hist_after_filter.p','rb')) hierarchy = hc.load() file = open('/Users/xiwang/git-code/Dataset/poi_data/yelp_dataset_round11/business.json', 'r') category_alias_map = {} with open('./data/categories.json', 'rb') as f: categories = json.load(f) for category in categories: category_alias_map[category['title']] = category['alias'] f.close() location_category = {} unmatched_cat = set() count_business = 0 count_no_cat_business = 0 for line in file.readlines(): location = json.loads(line) min_level = 10 count_business += 1 if len(location['categories']) > 0: general_cat = ' ' for category in location['categories']: if category in category_alias_map and hierarchy.get_level_num(category_alias_map[category]) < min_level: general_cat = category_alias_map[category] else: # print('No Match Category Exception: ', category)