예제 #1
0
def train():

    model = CNN(num_used_hists, num_words, num_word_embedding_dims)
    model.create_model()
    model.get_model_summary()

    print('loading doc embedding...')
    mp_doc_embedding = my_utils.read_pkl(
        config['DEFAULT']['path_all_news_doc_embedding'])

    print('constructing input data...')
    train_news_ids = get_train_news_ids()
    user_in = []
    article_in = []
    truth = []
    for user_id, clicked_news_ids in tqdm(mp_train_hist.items()):
        user_embedding = []
        if len(clicked_news_ids) > num_used_hists:
            for news_id in clicked_news_ids[:num_used_hists]:
                user_embedding.append(mp_doc_embedding[news_id])
            for news_id in clicked_news_ids[num_used_hists:]:
                article_in.append(mp_doc_embedding[news_id])
                user_in.append(user_embedding)
                truth.append(1)
                for i in range(num_train_negatives):
                    article_in.append(mp_doc_embedding[get_negative_news_id(
                        train_news_ids, user_id)])
                    user_in.append(user_embedding)
                    truth.append(0)
        else:
            for news_id in clicked_news_ids[:-1]:
                user_embedding.append(mp_doc_embedding[news_id])
            num_paddings = num_used_hists - len(user_embedding)
            for i in range(num_paddings):
                user_embedding.append(
                    np.zeros((num_words, num_word_embedding_dims)))
            article_in.append(mp_doc_embedding[clicked_news_ids[-1]])
            user_in.append(user_embedding)
            truth.append(1)
            for i in range(num_train_negatives):
                article_in.append(mp_doc_embedding[get_negative_news_id(
                    train_news_ids, user_id)])
                user_in.append(user_embedding)
                truth.append(0)

    print('reshaping input data...')
    user_in = np.array(user_in)
    article_in = np.array(article_in)
    user_in = np.resize(user_in, (user_in.shape[0], 1) + user_in.shape[1:])
    article_in = np.resize(article_in,
                           (article_in.shape[0], 1) + article_in.shape[1:])

    print('start training...')
    model.fit_model([user_in, article_in], np.array(truth), sample_batch_size,
                    num_epochs)