コード例 #1
0
ファイル: test_user_knn.py プロジェクト: shafiahmed/flurs
 def test_score(self):
     self.recommender.register(User(0))
     self.recommender.register(Item(0))
     self.recommender.update(Event(User(0), Item(0), 1))
     score = self.recommender.score(User(0), np.array([0]))
     print(score)
     self.assertTrue(score >= -1. and score <= 1.)
コード例 #2
0
ファイル: test_popular.py プロジェクト: hatanakatakashi/flurs
 def test_update(self):
     self.recommender.add_user(User(0))
     self.recommender.add_item(Item(0))
     self.recommender.update(Event(User(0), Item(0), 1))
     self.assertEqual(self.recommender.n_user, 1)
     self.assertEqual(self.recommender.n_item, 1)
     assert_array_equal(self.recommender.freq, np.array([1]))
コード例 #3
0
    def setUp(self):
        recommender = Popular()
        recommender.init_recommender()
        self.evaluator = Evaluator(recommender, can_repeat=False)

        self.samples = [
            Event(User(0), Item(0), 1),
            Event(User(0), Item(1), 1),
            Event(User(1), Item(2), 1),
            Event(User(0), Item(3), 1),
            Event(User(2), Item(4), 1),
            Event(User(1), Item(4), 1),
            Event(User(0), Item(5), 1),
            Event(User(2), Item(1), 1),
            Event(User(0), Item(6), 1),
            Event(User(2), Item(0), 1)
        ]
コード例 #4
0
ファイル: recommender.py プロジェクト: zjw0358/flurs
async def process(stream):
    async for obj in stream:
        event = json.loads(obj)
        if event['rating'] < 3:
            continue
        user, item = User(event['user'] - 1), Item(event['item'] - 1)
        print(recommender.recommend(user, np.arange(0, n_item)))
        recommender.update(Event(user, item))
コード例 #5
0
    def convert(self):
        """Create a list of samples and count number of users/items.

        """

        clicks = []
        with open(os.path.join(os.path.dirname(__file__),
                               '../data/click.tsv')) as f:
            clicks = list(
                map(lambda l: list(map(int,
                                       l.rstrip().split('\t'))),
                    f.readlines()))

        self.samples = []

        u_index = 0  # each sample indicates different visitors
        n_geo = 50  # 50 states in US

        ad_ids = []
        ad_categories = []

        for ad_id, year, geo, sex in clicks:
            if ad_id not in ad_ids:
                ad_ids.append(ad_id)
                ad_categories.append(self.categories[ad_id])
            i_index = ad_ids.index(ad_id)

            geo_vec = np.zeros(n_geo)
            geo_vec[geo - 1] = 1.

            # normalized age in [0, 1]
            # clickgenerator.jl generates a birth year in [1930, 2000]
            age = 1. - ((2000 - year) / 70.)

            user = User(
                0, np.concatenate((np.array([age]), np.array([sex]), geo_vec)))

            # category vector
            category = np.zeros(3)
            category[ad_categories[i_index]] = 1

            item = Item(i_index, category)

            sample = Event(user, item, 1.)
            self.samples.append(sample)

            u_index += 1

        self.n_user = u_index
        self.n_item = 5  # 5 ad variants
        self.n_sample = len(self.samples)
        self.n_batch_train = int(
            self.n_sample * 0.2)  # 20% for pre-training to avoid cold-start
        self.n_batch_test = int(self.n_sample *
                                0.1)  # 10% for evaluation of pre-training
        self.n_test = self.n_sample - (self.n_batch_train + self.n_batch_test)
コード例 #6
0
def recommend_service(chosen):
    evaluator = pickle.load(open('evaluator.pckl', 'rb'))
    user_id =  273882713
    last = pickle.load(open('last.pckl', 'rb'))
    tfidfs = pickle.load(open('movies.pckl', 'rb'))
    item_ids_keyed = pickle.load(open('item_ids.pckl', 'rb'))
    # 70% incremental evaluation and updating
    logging.info('incrementally predict, evaluate and update the recommender')
    movie_names = pickle.load(open('movies_names.pckl', 'rb'))
    items = []
    user = User(len(evaluator.rec.users), np.zeros(0))

    if evaluator.rec.is_new_user(user.index):
        evaluator.rec.register_user(user)


    items_in_order = list(item_ids_keyed)
    for item_id in chosen:
        index = items_in_order.index(int(item_id))
        item = Item(index, tfidfs[int(item_id)])
        if evaluator.rec.is_new_item(item.index):
            evaluator.rec.register_item(item)
        items.append(item)

    events = []

    # Calculate time of the week
    date = datetime.now()
    weekday_vec = np.zeros(7)
    weekday_vec[date.weekday()] = 1

    if user_id in last:
        last_item_vec = last[user_id]['item']
        last_weekday_vec = last[user_id]['weekday']
    else:
        last_item_vec = np.zeros(49)
        last_weekday_vec = np.zeros(7)
        
    for item in items:
        others = np.concatenate((weekday_vec, last_item_vec, last_weekday_vec))
        events.append(Event(user, item, 1, others))
        last[user_id] = {'item': item.feature, 'weekday': weekday_vec}
        
    for e in events:
        evaluator.rec.update(e)

    # Re save pickles
    pickle.dump(evaluator, open('evaluator.pckl', 'wb'))
    pickle.dump(last, open('last.pckl', 'wb'))

    candidates = list(set(evaluator.item_buffer))
    recommendations = evaluator.rec.recommend(user, np.array(candidates), [0 for x in range(0, 63)])
    top_rec = recommendations[0][-10:]
    for top in reversed(top_rec):
        print(movie_names[list(item_ids_keyed)[top]])
コード例 #7
0
    def convert(self):
        """Create a list of samples and count number of users/items.

        """
        df_lastfm = pd.read_csv(os.path.join(os.path.dirname(__file__), '../data/lastfm.tsv'), delimiter='\t')

        self.samples = []
        self.dts = []

        # number of artists will be dimension of item contexts
        n_artist = len(set(df_lastfm['artist_index']))
        self.contexts['item'] = n_artist

        countries = list(set(df_lastfm['country']))
        n_country = len(countries)  # 16 in total
        d_country = dict(zip(countries, range(n_country)))

        for i, row in df_lastfm.iterrows():
            country_vec = np.zeros(n_country)
            country_vec[d_country[row['country']]] = 1.

            user = User(row['u_index'],
                        np.concatenate((np.array([row['age']]), np.array([row['gender']]), country_vec)))

            artist_vec = np.zeros(n_artist)
            artist_vec[row['artist_index']] = 1

            item = Item(row['i_index'], artist_vec)

            sample = Event(user, item, 1., np.array([row['time']]))
            self.samples.append(sample)

            self.dts.append(row['dt'])

        self.n_user = len(set(df_lastfm['userid']))
        self.n_item = len(set(df_lastfm['track-id']))
        self.n_sample = len(self.samples)
        self.n_batch_train = int(self.n_sample * 0.2)  # 20% for pre-training to avoid cold-start
        self.n_batch_test = int(self.n_sample * 0.1)  # 10% for evaluation of pre-training
        self.n_test = self.n_sample - (self.n_batch_train + self.n_batch_test)
コード例 #8
0
ファイル: test_sketch.py プロジェクト: hatanakatakashi/flurs
 def test_score(self):
     self.recommender.add_user(User(0))
     self.recommender.add_item(Item(0))
     self.recommender.update(Event(User(0), Item(0), 1))
     score = self.recommender.score(User(0), np.array([0]), np.array([0]))
     self.assertTrue(score >= 0. and score <= 1.0)
コード例 #9
0
ファイル: test_sketch.py プロジェクト: hatanakatakashi/flurs
 def test_update(self):
     self.recommender.add_user(User(0))
     self.recommender.add_item(Item(0))
     self.recommender.update(Event(User(0), Item(0), 1))
     self.assertEqual(self.recommender.n_user, 1)
     self.assertEqual(self.recommender.n_item, 1)
コード例 #10
0
ファイル: test_popular.py プロジェクト: shafiahmed/flurs
 def test_score(self):
     self.recommender.register(User(0))
     self.recommender.register(Item(0))
     self.recommender.update(Event(User(0), Item(0), 1))
     self.assertEqual(self.recommender.score(User(0), np.array([0])), 1)
コード例 #11
0
ファイル: recommend.py プロジェクト: Taitai6521/kag-ref
map_user = {}
map_item = {}
user_idx = 0
item_idx = 0

print('load')
for user, item in tqdm(df):
    if user not in map_user:
        map_user[user] = user_idx
        user_idx += 1
    if item not in map_item:
        map_item[item] = item_idx
        item_idx += 1
    user = map_user[user]
    item = map_item[item]

    user = User(user)
    recommender.add_user(user)

    item = Item(item)
    recommender.add_item(item)

    event = Event(user, item, 1)
    recommender.update(event)

with open('recommend.pkl', 'wb') as f:
    pickle.dump(recommender, f, -1)
# specify target user and list of item candidates
#recommender.recommend(user, [0])
# => (sorted candidates, scores)
コード例 #12
0
    def convert(self):
        """Create a list of samples and count number of users/items.

        """
        self.__load_ratings()

        users = self.__load_users()
        movies, movie_titles = self.__load_movies()

        user_ids = []
        item_ids = []

        self.samples = []

        head_date = datetime(*time.localtime(self.ratings[0, 3])[:6])
        self.dts = []

        last = {}

        for user_id, item_id, rating, timestamp in self.ratings:
            # give an unique user index
            if user_id not in user_ids:
                user_ids.append(user_id)
            u_index = user_ids.index(user_id)

            # give an unique item index
            if item_id not in item_ids:
                item_ids.append(item_id)
            i_index = item_ids.index(item_id)

            # delta days
            date = datetime(*time.localtime(timestamp)[:6])
            dt = self.__delta(head_date, date)
            self.dts.append(dt)

            weekday_vec = np.zeros(7)
            weekday_vec[date.weekday()] = 1

            if user_id in last:
                last_item_vec = last[user_id]['item']
                last_weekday_vec = last[user_id]['weekday']
            else:
                last_item_vec = np.zeros(18)
                last_weekday_vec = np.zeros(7)

            others = np.concatenate(
                (weekday_vec, last_item_vec, last_weekday_vec))

            user = User(u_index, users[user_id])
            item = Item(i_index, movies[item_id])

            sample = Event(user, item, 1., others)
            self.samples.append(sample)

            # record users' last rated movie features
            last[user_id] = {'item': movies[item_id], 'weekday': weekday_vec}

        self.n_user = len(user_ids)
        self.n_item = len(item_ids)
        self.n_sample = len(self.samples)
        self.n_batch_train = int(
            self.n_sample * 0.2)  # 20% for pre-training to avoid cold-start
        self.n_batch_test = int(self.n_sample *
                                0.1)  # 10% for evaluation of pre-training
        self.n_test = self.n_sample - (self.n_batch_train + self.n_batch_test)
コード例 #13
0
]
new_u = h.create_user()

for i, mid in enumerate(h.seen_movies):
    name = h.movie_id2name(mid)
    match = False
    for item in match_lst:
        if item.lower() in name.lower():
            match = True
            break
    if not match:
        continue
    print(name)
    print(mid)

    print(h.item_ids[mid])
    ctx = np.zeros(27)
    ctx[4] = 1
    mov = h.get_movie(mid)
    evt = Event(new_u, mov, context=ctx)
    rec.update(evt)

print('===== Recommending =====')
cand = rec.recommend(new_u, np.array(sorted(list(h.item_ids.values()))), ctx)
for c in cand[0][:10]:
    c = int(c)
    print(c)
    # print(h.movie_id2name(c))
    print(h.movie_id2name(h.rev_item_ids[c]))
# print(dir(new_u))
コード例 #14
0
    def fetch_movielens(self):
        self.seen_movies = set()
        print('Getting ratings...')
        users, ratings = self.load_ratings()
        print('Getting movies...')
        movies = self.load_movies()

        samples = []

        user_ids = {}
        item_ids = {}

        head_date = datetime(*time.localtime(ratings[0, 3])[:6])
        dts = []

        last = {}

        cnt = 0
        print('Processing ratings...')
        for user_id, item_id, rating, timestamp in tqdm(ratings):
            # Remap user indices
            if user_id in user_ids:
                u_index = user_ids[user_id]
            else:
                u_index = len(user_ids)
                user_ids[user_id] = u_index

            # Remap item indices
            if item_id in item_ids:
                i_index = item_ids[item_id]
            else:
                i_index = len(item_ids)
                item_ids[item_id] = i_index
            self.seen_movies.add(item_id)

            date = datetime(*time.localtime(timestamp)[:6])
            dt = delta(head_date, date)
            dts.append(dt)

            weekday_vec = np.zeros(7)
            weekday_vec[date.weekday()] = 1

            if user_id in last:
                last_item_vec = last[user_id]
            else:
                last_item_vec = np.zeros(20)

            others = np.concatenate((weekday_vec, last_item_vec))

            # Dummy feature to prevent errors
            user = User(u_index, np.zeros(1))
            item = Item(i_index, movies[item_id])

            sample = Event(user, item, 1., others)
            samples.append(sample)

            last[user_id] = movies[item_id]
        self.user_ids = user_ids
        self.item_ids = item_ids
        self.rev_item_ids = {y: x for (x, y) in item_ids.items()}
        print('Done loading!')

        return Bunch(
            samples=samples,
            can_repeat=False,
            # 7 days of the week + 20 genres
            # Dummy feature for user
            contexts={
                'others': 7 + 20,
                'item': 20,
                'user': 1
            },
            n_user=len(user_ids),
            n_item=len(item_ids),
            n_sample=len(samples))
コード例 #15
0
def fetch_movielens(data_home=None, size='100k'):
    assert data_home is not None

    print('Loading ratings.')
    try:
        ratings = pickle.load(open('ratings.pkl', 'rb'))
    except FileNotFoundError:
        ratings = load_ratings(data_home, size)
        f = open('ratings.pkl', 'wb')
        pickle.dump(ratings, f)
        f.close()

    print('Loading movies.')
    try:
        movies = pickle.load(open('movies.pkl', 'rb'))
    except FileNotFoundError:
        movies = load_movies(data_home, size)
        f = open('movies.pkl', 'wb')
        pickle.dump(movies, f)
        f.close()

    samples = []

    user_ids = []
    item_ids = []

    head_date = datetime(*time.localtime(int(ratings[0, 3]))[:6])
    dts = []
    user_ids_keyed = {}
    item_ids_keyed = {}
    last = {}
    print('creating dataset')
    i = 1
    last_item_vec_zeros = np.zeros(0)
    for user_id, item_id, rating, timestamp in ratings:
        a = datetime.now()

        #if i%100000 == 0:
            #print('rating ' + str(i))
        item_id = int(item_id)
        # give an unique user index
        if user_id in user_ids_keyed:
            u_index = user_ids_keyed[user_id]
        else:
            u_index = len(user_ids_keyed)
            user_ids_keyed[user_id] = u_index
        
        #if user_id not in user_ids:
        #    user_ids.append(user_id)
        #u_index = user_ids.index(user_id)
        b = datetime.now()
        # give an unique item index
        if item_id in item_ids_keyed:
            i_index = item_ids_keyed[item_id]
        else:
            i_index = len(item_ids_keyed)
            item_ids_keyed[item_id] = i_index
        #if item_id not in item_ids:
        #    item_ids.append(item_id)
        #i_index = item_ids.index(item_id)
        c = datetime.now()
        # delta days
        date = datetime(*time.localtime(int(timestamp))[:6])
        dt = delta(head_date, date)
        dts.append(dt)
        d = datetime.now()
        weekday_vec = np.zeros(7)
        weekday_vec[date.weekday()] = 1
        e = datetime.now()

        if user_id in last:
            last_item_vec = last[user_id]['item']
            last_weekday_vec = last[user_id]['weekday']
        else:
            last_item_vec = np.zeros(49)
            last_weekday_vec = np.zeros(7)
        f = datetime.now()
        others = np.concatenate((weekday_vec, last_item_vec, last_weekday_vec))
        g = datetime.now()
        user = User(u_index, np.zeros(0))
        item = Item(i_index, movies[item_id])
        h = datetime.now()
        sample = Event(user, item, 1., others)
        samples.append(sample)
        ii = datetime.now()

        #print('to:')

        if i%100000 == 0:
            print (i)
            print (len(item_ids_keyed))
            #print(((ii - a).microseconds))
        # if i % 1000000 == 0:
        #     f = open('samples' + str(i) + '.pkl', 'wb')
        #     pickle.dump(samples, f)
        #     f.close()
        #     samples = []

        # record users' last rated movie features
        last[user_id] = {'item': movies[item_id], 'weekday': weekday_vec}
        i = i+1
    
    pickle.dump(last, open('last.pckl', 'wb'))
    file = open('item_ids.pkl', 'wb')
    pickle.dump(item_ids_keyed, file)
    file.close()

    file = open('user_ids.pkl', 'wb')
    pickle.dump(user_ids, file)
    file.close()

    # file = open('samples.pkl', 'wb')
    # pickle.dump(samples, file)
    # file.close()

    # contexts in this dataset
    # 1 delta time, 18 genres, and 23 demographics (1 for M/F, 1 for age, 21 for occupation(0-20))
    # 7 for day of week, 18 for the last rated item genres, 7 for the last day of week, 28 for tf-idf
    return Bunch(samples=samples,
                 can_repeat=False,
                 contexts={'others': 7 + 21 + 28 + 7, 'item': 49, 'user': 0},
                 n_user=len(user_ids_keyed),
                 n_item=len(item_ids_keyed),
                 n_sample=len(samples))