Exemple #1
0
 def __init__(self, usr_loc_hist):
     # load categories hierarchy
     self.ch = hc.load()
     # number of users in system
     self.n_users = len(usr_loc_hist['user_id'].unique())
     # user_venue count matrix
     self.uv_cnt = pd.crosstab(usr_loc_hist['user_id'],
                               usr_loc_hist['business_id'])
     # user location history
     self.usr_loc_hist = usr_loc_hist.groupby['venue_category_name']
Exemple #2
0
def train(events):
    # Load Categories Hierarchy
    print 'Loading Categories Hierarchy', datetime.now()
    h = ch.load()
    categories = h.keys()
    mab = ContainerMAB(n_arms=len(categories), n_bins=24)

    n_trials = events.shape[0]
    returned_cate = [None for i in range(n_trials)]
    rewards = [0.0 for i in range(n_trials)]
    cumulative_rewards = [0.0 for i in range(n_trials)]

    progress = n_trials
    for cnt, (r_ind, r) in enumerate(events.iterrows()):
        if cnt % (progress/10) == 0:
            print '    Progress:', cnt*100/progress, '%', datetime.now()

        t = r['datetime']
        cate_idx = mab.select_arms(int(t.hour))
        category = categories[cate_idx]

        returned_cate[cnt] = cate_idx

        # Compute Reward
        v_category = r['venue_category_id']
        if v_category in h:
            reward = h.dist_to_LCA(category, v_category, 0)
            reward = 1 - (reward/3)**2
        else:
            reward = 0
        mab.update(t.hour, cate_idx, reward)

        # Compute Cumulative Reward
        rewards[cnt] = reward
        if cnt == 0:
            cumulative_rewards[cnt] = reward
        else:
            cumulative_rewards[cnt] = cumulative_rewards[cnt-1] + reward
    stats = {
        'n_trials': n_trials,
        'pulled_arms': returned_cate,
        'rewards': rewards,
        'cumulative_rewards': cumulative_rewards
    }
    return mab, stats
Exemple #3
0
def train(events):
    # Load Categories Hierarchy
    print 'Loading Categories Hierarchy', datetime.now()
    h = ch.load()
    categories = h.keys()
    mab = ContainerMAB(n_arms=len(categories), n_bins=24)

    n_trials = events.shape[0]
    returned_cate = [None for i in range(n_trials)]
    rewards = [0.0 for i in range(n_trials)]
    cumulative_rewards = [0.0 for i in range(n_trials)]

    progress = n_trials
    for cnt, (r_ind, r) in enumerate(events.iterrows()):
        if cnt % (progress / 10) == 0:
            print '    Progress:', cnt * 100 / progress, '%', datetime.now()

        t = r['datetime']
        cate_idx = mab.select_arms(int(t.hour))
        category = categories[cate_idx]

        returned_cate[cnt] = cate_idx

        # Compute Reward
        v_category = r['venue_category_id']
        if v_category in h:
            reward = h.dist_to_LCA(category, v_category, 0)
            reward = 1 - (reward / 3)**2
        else:
            reward = 0
        mab.update(t.hour, cate_idx, reward)

        # Compute Cumulative Reward
        rewards[cnt] = reward
        if cnt == 0:
            cumulative_rewards[cnt] = reward
        else:
            cumulative_rewards[cnt] = cumulative_rewards[cnt - 1] + reward
    stats = {
        'n_trials': n_trials,
        'pulled_arms': returned_cate,
        'rewards': rewards,
        'cumulative_rewards': cumulative_rewards
    }
    return mab, stats
Exemple #4
0
 def __init__(self, usr_loc_hist):
     # Load categories hierarchy
     self.ch = hc.load()
     # number of users in system
     self.n_users = len(usr_loc_hist['user_id'].unique())
     # user-venue count matrix
     self.uv_cnt = pd.crosstab(usr_loc_hist['user_id'],
                               usr_loc_hist['venue_id'])
     # user location history
     self.usr_loc_hist = usr_loc_hist.groupby(['venue_category_id'])
     # venues in categories
     self.venues = {}
     # user's preference weight
     self.ucw = {}
     # category experts
     self.experts = {}
     # Train
     self.offline()
Exemple #5
0
 def __init__(self, usr_loc_hist):
     # Load categories hierarchy
     self.ch = hc.load()
     # number of users in system
     self.n_users = len(usr_loc_hist['user_id'].unique())
     # user-venue count matrix
     self.uv_cnt = pd.crosstab(usr_loc_hist['user_id'],
                               usr_loc_hist['venue_id'])
     # user location history
     self.usr_loc_hist = usr_loc_hist.groupby(['venue_category_id'])
     # venues in categories
     self.venues = {}
     # user's preference weight
     self.ucw = {}
     # category experts
     self.experts = {}
     # Train
     self.offline()
Exemple #6
0
import pickle
import pandas as pd
import categories_hierarchy as hc
import json

# Phoenix_rating_hist,LV_rating_hist = pickle.load(open('user_rating_hist_after_filter.p','rb'))
hierarchy = hc.load()
file = open('/Users/xiwang/git-code/Dataset/poi_data/yelp_dataset_round11/business.json', 'r')

category_alias_map = {}
with open('./data/categories.json', 'rb') as f:
    categories = json.load(f)
    for category in categories:
        category_alias_map[category['title']] = category['alias']
f.close()

location_category = {}
unmatched_cat = set()
count_business = 0
count_no_cat_business = 0
for line in file.readlines():
    location = json.loads(line)
    min_level = 10
    count_business += 1
    if len(location['categories']) > 0:
        general_cat = ' '
        for category in location['categories']:
            if category in category_alias_map and hierarchy.get_level_num(category_alias_map[category]) < min_level:
                general_cat = category_alias_map[category]
            else:
                # print('No Match Category Exception: ', category)