Ejemplo n.º 1
0
def bitmovin(request):
    template = loader.get_template("bitmovin/index.html")
    uname = str(request.user)
    context = {
        'username': uname,
    }
    print get_author(uname)[0][0]
    return HttpResponse(template.render(context, request))
def get_user_item_feature(user, target, example_age):
    following_list = load_data('following_list')
    subscript = 1.0 if get_author(target) in following_list[user] else 0.0
    user_item_feature = [
        subscript,
        example_age,
        math.pow(example_age, 2),
        math.sqrt(example_age),
    ]
    return np.array(user_item_feature)
Ejemplo n.º 3
0
def main():
    print('generate cache for author_to_articles...')

    metadata = load_data('metadata.json')
    author_to_articles = defaultdict(lambda: set())
    for a in metadata.keys():
        author = get_author(a)
        author_to_articles[author].add(a)

    author_to_articles = dict(author_to_articles)
    with open(cache_path('author_to_articles.pickle'), 'wb') as f:
        pickle.dump(author_to_articles, f)
Ejemplo n.º 4
0
def main():
    print('generate cache for author_to_id ...')

    article_to_id = load_data('article_to_id')
    author_to_id = {
        TOKEN_PAD: VALUE_PAD,
        TOKEN_UNK: VALUE_UNK,
    }
    for a in article_to_id.keys():
        author = get_author(a)
        if author not in author_to_id:
            author_to_id[author] = len(author_to_id)

    with open(cache_path('author_to_id.pickle'), 'wb') as f:
        pickle.dump(author_to_id, f)
    print('author_to_id size = {}'.format(len(author_to_id)))
Ejemplo n.º 5
0
def check_licences(request):
    uname = str(request.GET.get('user'))
    if get_author(uname)[0][0]:
        return 'play=true'
    else:
        return 'play=false'
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_path', required=True)
    parser.add_argument('--user_set', default='dev.users')
    parser.add_argument('--basetime', default='2019022200')
    parser.add_argument('--output_path', default='', type=str)
    parser.add_argument('--topn', default=100)
    conf = parser.parse_args()
    topn = conf.topn

    load_data('article_to_id',
              target_path=os.path.join(os.path.dirname(conf.model_path),
                                       'article_to_id.pickle'))

    with open(os.path.join(os.path.dirname(conf.model_path), 'config.json'),
              "r") as f:
        conf_dict = json.load(f)

    user_model, item_model, scorer_model = get_model(
        num_article=conf_dict['num_article'],
        num_magazine=conf_dict['num_magazine'],
        num_search_keyword=conf_dict['num_search_keyword'],
        negative_sample_size=1,
        article_embedding_matrix=None,
        head_num=conf_dict['head_num'],
        transformer_num=conf_dict['transformer_num'],
        feed_forward_dim=conf_dict['feed_forward_dim'],
        dropout_rate=conf_dict['dropout_rate'],
        lr=conf_dict['lr'],
        decay_rate=conf_dict['decay_rate'],
        inference=True,
        weight_path=conf.model_path)
    #
    metadata = load_data('metadata.json')
    total_items = np.array(list(metadata.keys()))

    seens_total = load_data('seens')
    user_list = load_data(conf.user_set)
    print('extract user embedding')
    user_embeddings, user_index = get_user_embeddings(user_model, seens_total,
                                                      user_list)
    print('extract item embedding')
    item_embeddings, item_to_id, id_to_item = get_item_embeddings(
        item_model, total_items)

    item_index = gen_item_index(user_model)

    queue = Queue(maxsize=1)

    following_list = load_data('following_list')
    author_to_articles = load_data('author_to_articles')
    most_view_items = most_popular(500, conf.basetime)
    new_items = get_new_article(metadata, '2019020100')
    default_targets = [
        item_to_id[a] for a in set(most_view_items + new_items)
        if a in item_to_id
    ]

    def gen_feature():
        for user in user_list:
            if user_embeddings[user] is None:
                queue.put((user, None, None))
            else:
                targets = set(default_targets)

                # 유사한 사용자 패턴을 가지는 유저가 봤던 게시글을 가져옴
                for target_user in user_index(100):
                    if target_user not in seens_total:
                        continue
                    for article in seens_total[target_user]['articles']:
                        if article in item_to_id:
                            targets.add(item_to_id[article])

                # 최신글중에서 유저가 봤던 최근 5개의 글과 유사한 게시글을 가져옴
                # for article in seens_total[user]['articles']:
                for article in seens_total[user]['articles'][
                        max(0,
                            len(seens_total[user]['articles']) - 20):]:
                    for target_article in item_index(article, 100):
                        if target_article in item_to_id:
                            targets.add(item_to_id[target_article])

                # follow하는 작가의 게시글을 가져옴
                if user in following_list:
                    for author in following_list[user]:
                        for a in author_to_articles.get(author, set()):
                            if a in item_to_id:
                                targets.add(item_to_id[a])

                targets = list(targets)
                user_item_inputs = np.zeros((len(targets), 4))
                for i, target in enumerate(targets):
                    user_item_feature = get_user_item_feature(
                        user, id_to_item[target], 1.01)
                    user_item_inputs[i] = user_item_feature
                inputs = [
                    np.array([user_embeddings[user]] * len(targets)),
                    item_embeddings[targets], user_item_inputs
                ]
                queue.put((user, inputs, [id_to_item[i] for i in targets]))

    worker = threading.Thread(target=gen_feature)
    worker.start()

    outputs = OrderedDict()
    for _ in trange(len(user_list)):
        user, inputs, targets = queue.get()
        if inputs is None:
            candidates = OrderedDict()
            if user in following_list:
                for article in most_view_items:
                    if get_author(article) in following_list[user]:
                        if article not in candidates:
                            candidates[article] = True
            for article in most_view_items:
                if article not in candidates:
                    candidates[article] = True
            outputs[user] = list(candidates.keys())[:topn]
        else:
            score_matrix = scorer_model.predict(inputs,
                                                batch_size=len(inputs[0]))
            ranks = np.argsort(-score_matrix[:, 0])
            recommends = []
            seens_set = seens_total[user]['articles']
            for cand in [targets[idx] for idx in ranks]:
                if cand in seens_set or cand not in metadata:
                    continue
                recommends.append(cand)
                if len(recommends) == topn:
                    break
            outputs[user] = recommends

    worker.join()

    if conf.output_path:
        recommend_path = conf.output_path
    else:
        recommend_path = cache_path('recommend.txt')

    with open(recommend_path, 'w', encoding='utf-8') as f:
        for user, recommends in outputs.items():
            f.write("{} {}\n".format(user, " ".join(recommends)))

    print('EntDiv@%s: %s' % (topn, entropy_diversity(outputs, topn)))
def get_sequential_feature(user,
                           articles,
                           ages,
                           mask_rate=0.10,
                           mask_mask_rate=0.8,
                           random_sample_length=False,
                           random_range=False,
                           minimum_len=1,
                           data_type='test',
                           positive=True):
    metadata = load_data('metadata.json')
    metadata_articles = load_data('metadata_articles')
    article_to_id = load_data('article_to_id')
    magazine_to_id = load_data('magazine_to_id')
    following_list = load_data('following_list')
    author_to_id = load_data('author_to_id')

    if data_type == 'train':
        orig_len = len(articles) - 2
    elif data_type == 'valid':
        orig_len = len(articles) - 1
    else:
        orig_len = len(articles)
    cur_len = min(MAX_USER_SEQUENCE_LEN, orig_len)

    if random_sample_length and cur_len > minimum_len:
        cur_len = random.randrange(minimum_len, cur_len + 1)
    rem_len = MAX_USER_SEQUENCE_LEN - cur_len

    if random_range:
        bidx = random.randrange(0, orig_len - cur_len + 1)
        eidx = bidx + cur_len
    else:
        bidx = orig_len - cur_len
        eidx = bidx + cur_len

    item_size = len(metadata)

    article_input = []
    magazine_input = []
    feature_input = []
    author_input = []
    for article, example_age in zip(articles[bidx:eidx], ages[bidx:eidx]):
        if data_type == 'train' and np.random.random() < mask_rate:
            r = np.random.random()
            if r < mask_mask_rate:
                current = TOKEN_UNK
            else:
                current = metadata_articles[random.randrange(0, item_size)]
        else:
            current = article
        article_input.append(article_to_id.get(current, VALUE_UNK))

        if current in metadata:
            magazine_input.append(
                magazine_to_id.get(metadata[current]['magazine_id'],
                                   VALUE_UNK))
            article_age = metadata[current]['age']
        else:
            magazine_input.append(VALUE_UNK)
            article_age = 0
        author_input.append(author_to_id.get(current, VALUE_UNK))
        subscript = 1.0 if get_author(current) in following_list[user] else 0.0
        current_feature = [
            subscript,
            example_age,
            math.pow(example_age, 2),
            math.sqrt(example_age),
            article_age,
            math.pow(article_age, 2),
            math.sqrt(article_age),
        ]
        feature_input.append(current_feature)

    if data_type == 'test':
        target = None
        target_age = 1.001
    else:
        target_age = ages[eidx]
        if positive:
            target = articles[eidx]
        else:
            user_seen = set(articles)
            while True:
                target = metadata_articles[random.randrange(0, item_size)]
                if target in user_seen:
                    continue
                break

    article_sequence = [VALUE_PAD] * rem_len + article_input
    magazine_sequence = [VALUE_PAD] * rem_len + magazine_input
    author_sequence = [VALUE_PAD] * rem_len + author_input
    feature_sequence = [[0, 0, 0, 0, 0, 0, 0]] * rem_len + feature_input
    return article_sequence, magazine_sequence, author_sequence, feature_sequence, target_age, target