def bitmovin(request): template = loader.get_template("bitmovin/index.html") uname = str(request.user) context = { 'username': uname, } print get_author(uname)[0][0] return HttpResponse(template.render(context, request))
def get_user_item_feature(user, target, example_age): following_list = load_data('following_list') subscript = 1.0 if get_author(target) in following_list[user] else 0.0 user_item_feature = [ subscript, example_age, math.pow(example_age, 2), math.sqrt(example_age), ] return np.array(user_item_feature)
def main(): print('generate cache for author_to_articles...') metadata = load_data('metadata.json') author_to_articles = defaultdict(lambda: set()) for a in metadata.keys(): author = get_author(a) author_to_articles[author].add(a) author_to_articles = dict(author_to_articles) with open(cache_path('author_to_articles.pickle'), 'wb') as f: pickle.dump(author_to_articles, f)
def main(): print('generate cache for author_to_id ...') article_to_id = load_data('article_to_id') author_to_id = { TOKEN_PAD: VALUE_PAD, TOKEN_UNK: VALUE_UNK, } for a in article_to_id.keys(): author = get_author(a) if author not in author_to_id: author_to_id[author] = len(author_to_id) with open(cache_path('author_to_id.pickle'), 'wb') as f: pickle.dump(author_to_id, f) print('author_to_id size = {}'.format(len(author_to_id)))
def check_licences(request): uname = str(request.GET.get('user')) if get_author(uname)[0][0]: return 'play=true' else: return 'play=false'
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model_path', required=True) parser.add_argument('--user_set', default='dev.users') parser.add_argument('--basetime', default='2019022200') parser.add_argument('--output_path', default='', type=str) parser.add_argument('--topn', default=100) conf = parser.parse_args() topn = conf.topn load_data('article_to_id', target_path=os.path.join(os.path.dirname(conf.model_path), 'article_to_id.pickle')) with open(os.path.join(os.path.dirname(conf.model_path), 'config.json'), "r") as f: conf_dict = json.load(f) user_model, item_model, scorer_model = get_model( num_article=conf_dict['num_article'], num_magazine=conf_dict['num_magazine'], num_search_keyword=conf_dict['num_search_keyword'], negative_sample_size=1, article_embedding_matrix=None, head_num=conf_dict['head_num'], transformer_num=conf_dict['transformer_num'], feed_forward_dim=conf_dict['feed_forward_dim'], dropout_rate=conf_dict['dropout_rate'], lr=conf_dict['lr'], decay_rate=conf_dict['decay_rate'], inference=True, weight_path=conf.model_path) # metadata = load_data('metadata.json') total_items = np.array(list(metadata.keys())) seens_total = load_data('seens') user_list = load_data(conf.user_set) print('extract user embedding') user_embeddings, user_index = get_user_embeddings(user_model, seens_total, user_list) print('extract item embedding') item_embeddings, item_to_id, id_to_item = get_item_embeddings( item_model, total_items) item_index = gen_item_index(user_model) queue = Queue(maxsize=1) following_list = load_data('following_list') author_to_articles = load_data('author_to_articles') most_view_items = most_popular(500, conf.basetime) new_items = get_new_article(metadata, '2019020100') default_targets = [ item_to_id[a] for a in set(most_view_items + new_items) if a in item_to_id ] def gen_feature(): for user in user_list: if user_embeddings[user] is None: queue.put((user, None, None)) else: targets = set(default_targets) # 유사한 사용자 패턴을 가지는 유저가 봤던 게시글을 가져옴 for target_user in user_index(100): if target_user not in seens_total: continue for article in seens_total[target_user]['articles']: if article in item_to_id: targets.add(item_to_id[article]) # 최신글중에서 유저가 봤던 최근 5개의 글과 유사한 게시글을 가져옴 # for article in seens_total[user]['articles']: for article in seens_total[user]['articles'][ max(0, len(seens_total[user]['articles']) - 20):]: for target_article in item_index(article, 100): if target_article in item_to_id: targets.add(item_to_id[target_article]) # follow하는 작가의 게시글을 가져옴 if user in following_list: for author in following_list[user]: for a in author_to_articles.get(author, set()): if a in item_to_id: targets.add(item_to_id[a]) targets = list(targets) user_item_inputs = np.zeros((len(targets), 4)) for i, target in enumerate(targets): user_item_feature = get_user_item_feature( user, id_to_item[target], 1.01) user_item_inputs[i] = user_item_feature inputs = [ np.array([user_embeddings[user]] * len(targets)), item_embeddings[targets], user_item_inputs ] queue.put((user, inputs, [id_to_item[i] for i in targets])) worker = threading.Thread(target=gen_feature) worker.start() outputs = OrderedDict() for _ in trange(len(user_list)): user, inputs, targets = queue.get() if inputs is None: candidates = OrderedDict() if user in following_list: for article in most_view_items: if get_author(article) in following_list[user]: if article not in candidates: candidates[article] = True for article in most_view_items: if article not in candidates: candidates[article] = True outputs[user] = list(candidates.keys())[:topn] else: score_matrix = scorer_model.predict(inputs, batch_size=len(inputs[0])) ranks = np.argsort(-score_matrix[:, 0]) recommends = [] seens_set = seens_total[user]['articles'] for cand in [targets[idx] for idx in ranks]: if cand in seens_set or cand not in metadata: continue recommends.append(cand) if len(recommends) == topn: break outputs[user] = recommends worker.join() if conf.output_path: recommend_path = conf.output_path else: recommend_path = cache_path('recommend.txt') with open(recommend_path, 'w', encoding='utf-8') as f: for user, recommends in outputs.items(): f.write("{} {}\n".format(user, " ".join(recommends))) print('EntDiv@%s: %s' % (topn, entropy_diversity(outputs, topn)))
def get_sequential_feature(user, articles, ages, mask_rate=0.10, mask_mask_rate=0.8, random_sample_length=False, random_range=False, minimum_len=1, data_type='test', positive=True): metadata = load_data('metadata.json') metadata_articles = load_data('metadata_articles') article_to_id = load_data('article_to_id') magazine_to_id = load_data('magazine_to_id') following_list = load_data('following_list') author_to_id = load_data('author_to_id') if data_type == 'train': orig_len = len(articles) - 2 elif data_type == 'valid': orig_len = len(articles) - 1 else: orig_len = len(articles) cur_len = min(MAX_USER_SEQUENCE_LEN, orig_len) if random_sample_length and cur_len > minimum_len: cur_len = random.randrange(minimum_len, cur_len + 1) rem_len = MAX_USER_SEQUENCE_LEN - cur_len if random_range: bidx = random.randrange(0, orig_len - cur_len + 1) eidx = bidx + cur_len else: bidx = orig_len - cur_len eidx = bidx + cur_len item_size = len(metadata) article_input = [] magazine_input = [] feature_input = [] author_input = [] for article, example_age in zip(articles[bidx:eidx], ages[bidx:eidx]): if data_type == 'train' and np.random.random() < mask_rate: r = np.random.random() if r < mask_mask_rate: current = TOKEN_UNK else: current = metadata_articles[random.randrange(0, item_size)] else: current = article article_input.append(article_to_id.get(current, VALUE_UNK)) if current in metadata: magazine_input.append( magazine_to_id.get(metadata[current]['magazine_id'], VALUE_UNK)) article_age = metadata[current]['age'] else: magazine_input.append(VALUE_UNK) article_age = 0 author_input.append(author_to_id.get(current, VALUE_UNK)) subscript = 1.0 if get_author(current) in following_list[user] else 0.0 current_feature = [ subscript, example_age, math.pow(example_age, 2), math.sqrt(example_age), article_age, math.pow(article_age, 2), math.sqrt(article_age), ] feature_input.append(current_feature) if data_type == 'test': target = None target_age = 1.001 else: target_age = ages[eidx] if positive: target = articles[eidx] else: user_seen = set(articles) while True: target = metadata_articles[random.randrange(0, item_size)] if target in user_seen: continue break article_sequence = [VALUE_PAD] * rem_len + article_input magazine_sequence = [VALUE_PAD] * rem_len + magazine_input author_sequence = [VALUE_PAD] * rem_len + author_input feature_sequence = [[0, 0, 0, 0, 0, 0, 0]] * rem_len + feature_input return article_sequence, magazine_sequence, author_sequence, feature_sequence, target_age, target