def train(): model = CNN(num_used_hists, num_words, num_word_embedding_dims) model.create_model() model.get_model_summary() print('loading doc embedding...') mp_doc_embedding = my_utils.read_pkl( config['DEFAULT']['path_all_news_doc_embedding']) print('constructing input data...') train_news_ids = get_train_news_ids() user_in = [] article_in = [] truth = [] for user_id, clicked_news_ids in tqdm(mp_train_hist.items()): user_embedding = [] if len(clicked_news_ids) > num_used_hists: for news_id in clicked_news_ids[:num_used_hists]: user_embedding.append(mp_doc_embedding[news_id]) for news_id in clicked_news_ids[num_used_hists:]: article_in.append(mp_doc_embedding[news_id]) user_in.append(user_embedding) truth.append(1) for i in range(num_train_negatives): article_in.append(mp_doc_embedding[get_negative_news_id( train_news_ids, user_id)]) user_in.append(user_embedding) truth.append(0) else: for news_id in clicked_news_ids[:-1]: user_embedding.append(mp_doc_embedding[news_id]) num_paddings = num_used_hists - len(user_embedding) for i in range(num_paddings): user_embedding.append( np.zeros((num_words, num_word_embedding_dims))) article_in.append(mp_doc_embedding[clicked_news_ids[-1]]) user_in.append(user_embedding) truth.append(1) for i in range(num_train_negatives): article_in.append(mp_doc_embedding[get_negative_news_id( train_news_ids, user_id)]) user_in.append(user_embedding) truth.append(0) print('reshaping input data...') user_in = np.array(user_in) article_in = np.array(article_in) user_in = np.resize(user_in, (user_in.shape[0], 1) + user_in.shape[1:]) article_in = np.resize(article_in, (article_in.shape[0], 1) + article_in.shape[1:]) print('start training...') model.fit_model([user_in, article_in], np.array(truth), sample_batch_size, num_epochs)