def test(self, word_ids, sentiment): self._net.train(False) word_ids = word_ids.astype(np.int64) word_ids_tensor = gpu(torch.from_numpy(word_ids), self._use_cuda) sent_tensor = gpu( torch.from_numpy(np.asarray(sentiment).astype(np.float32)), self._use_cuda) epoch_loss = 0.0 epoch_acc = 0.0 for (minibatch_num, (batch_word, batch_sent)) in enumerate( minibatch_sentences(self._batch_size, word_ids_tensor, sent_tensor)): word_var = Variable(batch_word) sent_var = Variable(batch_sent.squeeze(), requires_grad=False) predictions = self._net(word_var) preds = accuracy_one(predictions.data) loss = self._loss(predictions, sent_var) epoch_loss = epoch_loss + loss.data[0] epoch_acc += torch.sum(preds != sent_var.data.byte()) / len( sent_var.data) epoch_loss = epoch_loss / (minibatch_num + 1) epoch_acc = epoch_acc / (minibatch_num + 1) return epoch_loss, epoch_acc
def _select_user_user_sppmi_input(self, batch_user, done, user_user_sppmi): """ Parameters ---------- batch_user: :class:`torch.Tensor` shape (batch_size, ) visited_users: :class:`set` id of visited users for network information adjNetwork: :class:`dict` key: userID `int` and values are adjacent vertices Returns ------- """ # network targets = [] T = torch_utils.tensor2numpy(batch_user) for u in T: if u in done: continue done.add(u) targets.append(u) if len(targets) == 0: return [] targets = np.array(targets) selected = user_user_sppmi[targets] user_indices = torch_utils.numpy2tensor(np.array(targets), dtype = torch.long) selected = torch_utils.numpy2tensor(np.array(selected), dtype = torch.float) sppmi = [torch_utils.gpu(user_indices, gpu = self._use_cuda), torch_utils.gpu(selected, self._use_cuda)] return sppmi
def load_best_model_test2_test3(self, test2: interactions.MatchInteraction, test3: interactions.MatchInteraction, topN: int): mymodel = self._net # print("Trained model: ", mymodel.out.weight) mymodel.load_state_dict(torch.load(self.saved_model)) mymodel.train(False) my_utils.gpu(mymodel, self._use_cuda) assert len(test2.unique_queries_test) in KeyWordSettings.QueryCountTest result_test2, error_analysis_val = self.evaluate(test2, topN, output_ranking=True) hits_test2 = result_test2["hits"] ndcg_test2 = result_test2["ndcg"] ndcg_at_1_test2 = result_test2["ndcg@1"] FileHandler.save_error_analysis_test2( json.dumps(error_analysis_val, sort_keys=True, indent=2)) FileHandler.myprint( 'Best Test2_hard hits@%d = %.5f | Best Test2_hard ndcg@%d = %.5f ' '|Best Test2_hard ndcg@1 = %.5f ' % (topN, hits_test2, topN, ndcg_test2, ndcg_at_1_test2)) return hits_test2, ndcg_test2
def load_best_model_single( self, target_interactions: interactions.MatchInteraction, topN: int): """ Note: This function is used for Heat map visualization only. """ mymodel = self._net # print("Trained model: ", mymodel.out.weight) mymodel.load_state_dict(torch.load(self.saved_model)) mymodel.train(False) my_utils.gpu(mymodel, self._use_cuda) # assert len(val_interactions.unique_queries_test) in KeyWordSettings.QueryCountVal result_val, error_analysis_val = self.evaluate(target_interactions, topN, output_ranking=True) hits = result_val["hits"] ndcg = result_val["ndcg"] ndcg_at_1 = result_val["ndcg@1"] FileHandler.save_error_analysis_validation( json.dumps(error_analysis_val, sort_keys=True, indent=2)) # FileHandler.save_error_analysis_testing(json.dumps(error_analysis_test, sort_keys = True, indent = 2)) FileHandler.myprint( 'Best Target hits@%d = %.5f | Best Target ndcg@%d = %.5f ' '|Best Target ndcg@1 = %.5f' % (topN, hits, topN, ndcg, ndcg_at_1)) return hits, ndcg
def _prepare_network_input(self, batch_user, visited_users, adjNetwork): """ Parameters ---------- batch_user: :class:`torch.Tensor` shape (batch_size, ) visited_users: :class:`set` id of visited users for network information adjNetwork: :class:`dict` key: userID `int` and values are adjacent vertices Returns ------- """ # network targets, labels = [], [] T = torch_utils.tensor2numpy(batch_user) for u in T: if u in visited_users: continue visited_users.add(u) neighbors = adjNetwork.get(u, [0] * self._n_users) targets.append(u) labels.append(neighbors) user_indices = torch_utils.numpy2tensor(np.array(targets), dtype = torch.long) labels = torch_utils.numpy2tensor(np.array(labels), dtype = torch.float) network = [torch_utils.gpu(user_indices, gpu = True), torch_utils.gpu(labels, True)] return network
def predict(self, user_ids, item_ids): self._net.train(False) user_ids = user_ids.astype(np.int64) item_ids = item_ids.astype(np.int64) user_ids_tensor = gpu(torch.from_numpy(user_ids), self._use_cuda) item_ids_tensor = gpu(torch.from_numpy(item_ids), self._use_cuda) user_var = Variable(user_ids_tensor) item_var = Variable(item_ids_tensor) return self._net(user_var, item_var).data.numpy()
def fit(self, word_ids, sentiment, word_ids_test, sentiment_test, verbose=True): word_ids = word_ids.astype(np.int64) word_ids_test = word_ids_test.astype(np.int64) if not self._initialized: self._initialize() self._net.train(True) for epoch_num in range(self._n_iter): words, sents = shuffle_sentences( word_ids, np.asarray(sentiment).astype(np.float32)) word_ids_tensor = gpu(torch.from_numpy(words), self._use_cuda) sent_tensor = gpu(torch.from_numpy(sents), self._use_cuda) epoch_loss = 0.0 epoch_acc = 0.0 for (minibatch_num, (batch_word, batch_sent)) in enumerate( minibatch_sentences(self._batch_size, word_ids_tensor, sent_tensor)): word_var = Variable(batch_word) sent_var = Variable(batch_sent.squeeze(), requires_grad=False) predictions = self._net(word_var) #print(predictions) preds = accuracy_one(predictions.data) self._optimizer.zero_grad() loss = self._loss(predictions, sent_var) epoch_loss = epoch_loss + loss.data[0] epoch_acc += torch.sum(preds != sent_var.data.byte()) / len( sent_var.data) loss.backward() self._optimizer.step() epoch_loss = epoch_loss / (minibatch_num + 1) epoch_acc = epoch_acc / (minibatch_num + 1) if verbose: val_loss, val_acc = self.test(word_ids_test, sentiment_test) #val_loss = 0 #val_acc = 0 print('Epoch {}: train loss {}'.format(epoch_num, epoch_loss), 'train acc', epoch_acc, 'validation loss', val_loss, 'validation acc', val_acc) self._net.train(True)
def fit(self, user_ids, item_ids, ratings, user_ids_test, item_ids_test, ratings_test, verbose=True): user_ids = user_ids.astype(np.int64) item_ids = item_ids.astype(np.int64) user_ids_test = user_ids_test.astype(np.int64) item_ids_test = item_ids_test.astype(np.int64) if not self._initialized: self._initialize() for epoch_num in range(self._n_iter): users, items, ratingss = shuffle(user_ids, item_ids, ratings) user_ids_tensor = gpu(torch.from_numpy(users), self._use_cuda) item_ids_tensor = gpu(torch.from_numpy(items), self._use_cuda) ratings_tensor = gpu(torch.from_numpy(ratingss), self._use_cuda) epoch_loss = 0.0 for (minibatch_num, (batch_user, batch_item, batch_ratings)) in enumerate( minibatch(self._batch_size, user_ids_tensor, item_ids_tensor, ratings_tensor)): user_var = Variable(batch_user) item_var = Variable(batch_item) ratings_var = Variable(batch_ratings) predictions = self._net(user_var, item_var) self._optimizer.zero_grad() loss = self._loss_func(ratings_var, predictions) epoch_loss = epoch_loss + loss.data[0] loss.backward() self._optimizer.step() epoch_loss = epoch_loss / (minibatch_num + 1) if verbose: val_loss = self.test(user_ids_test, item_ids_test, ratings_test) print('Epoch {}: train loss {}'.format(epoch_num, epoch_loss), 'validation loss', val_loss) self._net.train(True) if np.isnan(epoch_loss) or epoch_loss == 0.0: raise ValueError( 'Degenerate epoch loss: {}'.format(epoch_loss))
def _get_loss(self, query_ids: torch.Tensor, query_contents: torch.Tensor, doc_ids: torch.Tensor, doc_contents: torch.Tensor, query_lens: np.ndarray, docs_lens: np.ndarray, target_contents, **kargs) -> torch.Tensor: """ Compute loss for batch_size pairs. Note: Query and Doc have different lengths :param query_ids: (B, ) :param query_contents: (B, L) :param doc_ids: (B, ) :param doc_contents: (B, R) :param query_lens: (B, ) :param docs_lens: (B, ) :param target_contents: (B, R) :param kargs: :return: """ batch_size = query_ids.size(0) L2 = doc_contents.size(1) L1 = query_contents.size(1) q_new_indices, q_restoring_indices = torch_utils.get_sorted_index_and_reverse_index( query_lens) query_lens = my_utils.gpu(torch.from_numpy(query_lens), self._use_cuda) d_new_indices, d_old_indices = torch_utils.get_sorted_index_and_reverse_index( docs_lens) docs_lens = my_utils.gpu(torch.from_numpy(docs_lens), self._use_cuda) additional_paramters = { KeyWordSettings.Query_lens: query_lens, KeyWordSettings.QueryLensIndices: (q_new_indices, q_restoring_indices, query_lens), KeyWordSettings.Doc_lens: docs_lens, KeyWordSettings.DocLensIndices: (d_new_indices, d_old_indices, docs_lens), KeyWordSettings.UseCuda: self._use_cuda } logits = self._net(query_contents, doc_contents, query_lens, None, **additional_paramters) num_classes = len(self._vocab._state['term_index']) logits = logits.view(-1, num_classes) # (B * R, C) target_contents = target_contents.view(-1) # (B, R) => (B * R) loss = F.cross_entropy(logits, target_contents, ignore_index=self.index_of_pad_token) return loss
def _initialize(self, interactions): """ Parameters ---------- interactions: :class:`interactions.Interactions` Returns ------- """ self._n_users, self._n_items = interactions.num_users, interactions.num_items if self._net_type == "gau": self._net = my_nets.GAU(self._n_users, self._n_items, self._embedding_dim) # put the model into cuda if use cuda self._net = my_utils.gpu(self._net, self._use_cuda) if self._optimizer_func is None: self._optimizer = optim.Adam( self._net.parameters(), weight_decay = self._reg_l2, lr = self._learning_rate) else: self._optimizer = self._optimizer_func(self._net.parameters()) # losses functions self._loss_func = my_losses.single_pointwise_square_loss print("Using: ", self._loss_func)
def test(self, user_ids, item_ids, ratings): self._net.train(False) user_ids = user_ids.astype(np.int64) item_ids = item_ids.astype(np.int64) user_ids_tensor = gpu(torch.from_numpy(user_ids), self._use_cuda) item_ids_tensor = gpu(torch.from_numpy(item_ids), self._use_cuda) ratings_tensor = gpu(torch.from_numpy(ratings), self._use_cuda) user_var = Variable(user_ids_tensor) item_var = Variable(item_ids_tensor) ratings_var = Variable(ratings_tensor) predictions = self._net(user_var, item_var) loss = self._loss_func(ratings_var, predictions) return loss.data[0]
def _initialize(self): if self._net is None: self._net = gpu(FirstModel(self._embedding_dim,self._vocab_size,self._seq_len),self._use_cuda) self._optimizer = optim.Adam(self._net.parameters(),lr=self._learning_rate, weight_decay=0) if self._loss is None: self._loss = torch.nn.BCELoss()
def _get_negative_prediction(self, user_ids): """ Code from Spotlight """ negative_items = self._sampler.random_sample_items( self._n_items, len(user_ids), random_state=self._random_state) negative_var = my_utils.gpu(torch.from_numpy(negative_items), self._use_cuda) negative_prediction = self._net(user_ids, negative_var) return negative_prediction
def evaluate(self, testRatings: interactions.MatchInteraction, output_ranking=False, **kargs): self._net.train(False) # disabling training query_ids, left_contents, left_lengths, \ doc_ids, right_contents, target_contents, right_lengths = self._sampler.get_instances(testRatings) eval_loss = 0.0 total_tokens = 0 for (minibatch_num, (batch_query, batch_query_content, batch_query_len, batch_doc, batch_doc_content, batch_doc_target, batch_docs_lens)) \ in enumerate(my_utils.minibatch(query_ids, left_contents, left_lengths, doc_ids, right_contents, target_contents, right_lengths, batch_size=self._batch_size)): batch_query = my_utils.gpu(torch.from_numpy(batch_query), self._use_cuda) batch_query_content = my_utils.gpu( torch.from_numpy(batch_query_content), self._use_cuda) # batch_query_len = my_utils.gpu(torch.from_numpy(batch_query_len), self._use_cuda) batch_doc = my_utils.gpu(torch.from_numpy(batch_doc), self._use_cuda) batch_doc_content = my_utils.gpu( torch.from_numpy(batch_doc_content), self._use_cuda) batch_doc_target = my_utils.gpu(torch.from_numpy(batch_doc_target), self._use_cuda) # batch_docs_lens = my_utils.gpu(torch.from_numpy(batch_docs_lens), self._use_cuda) batch_loss = self._get_loss(batch_query, batch_query_content, batch_doc, batch_doc_content, batch_query_len, batch_docs_lens, batch_doc_target) mask = (batch_doc_target != self.index_of_pad_token) non_pad_tokens = torch.sum(mask).float() loss = batch_loss.data.cpu().numpy() loss *= non_pad_tokens eval_loss += loss total_tokens += non_pad_tokens eval_loss /= total_tokens results = dict() results["cross_entropy"] = eval_loss return results
def load_best_model(self, val_interactions: interactions.MatchInteraction, test_interactions: interactions.MatchInteraction): mymodel = self._net mymodel.load_state_dict(torch.load(self.saved_model)) mymodel.train(False) my_utils.gpu(mymodel, self._use_cuda) val_results = self.evaluate(val_interactions) val_loss = val_results["cross_entropy"] test_results = self.evaluate(test_interactions) test_loss = test_results["cross_entropy"] FileHandler.save_error_analysis_validation( json.dumps(val_results, sort_keys=True, indent=2)) FileHandler.save_error_analysis_testing( json.dumps(test_results, sort_keys=True, indent=2)) FileHandler.myprint('Best val loss = %.5f |Best Test loss = %.5f ' % (val_loss, test_loss)) return val_loss, test_loss
def _get_negative_prediction(self, user_ids): negative_items = sample_items( self._num_items, len(user_ids), random_state=self._random_state) negative_var = Variable( gpu(torch.from_numpy(negative_items), self._use_cuda) ) negative_prediction = self._net(user_ids, negative_var) return negative_prediction
def _initialize(self): if self._net is None: self._net = gpu( DotModel(self._num_users, self._num_items, self._embedding_dim), self._use_cuda) self._optimizer = optim.Adam(self._net.parameters(), lr=self._learning_rate, weight_decay=self._l2) if self._loss_func is None: self._loss_func = regression_loss
def load_best_model(self, val_interactions: interactions.MatchInteraction, test_interactions: interactions.MatchInteraction, topN: int): mymodel = self._net # print("Trained model: ", mymodel.out.weight) mymodel.load_state_dict(torch.load(self.saved_model)) mymodel.train(False) my_utils.gpu(mymodel, self._use_cuda) assert len(val_interactions.unique_queries_test ) in KeyWordSettings.QueryCountVal result_val, error_analysis_val = self.evaluate(val_interactions, topN, output_ranking=True) hits = result_val["hits"] ndcg = result_val["ndcg"] ndcg_at_1 = result_val["ndcg@1"] assert len(test_interactions.unique_queries_test ) in KeyWordSettings.QueryCountTest result_test, error_analysis_test = self.evaluate(test_interactions, topN, output_ranking=True) hits_test = result_test["hits"] ndcg_test = result_test["ndcg"] ndcg_at_1_test = result_test["ndcg@1"] FileHandler.save_error_analysis_validation( json.dumps(error_analysis_val, sort_keys=True, indent=2)) FileHandler.save_error_analysis_testing( json.dumps(error_analysis_test, sort_keys=True, indent=2)) FileHandler.myprint( 'Best Vad hits@%d = %.5f | Best Vad ndcg@%d = %.5f ' '|Best Test hits@%d = %.5f |Best Test ndcg@%d = %.5f' '|Best Vad ndcg@1 = %.5f |Best Test ndcg@1 = %.5f' % (topN, hits, topN, ndcg, topN, hits_test, topN, ndcg_test, ndcg_at_1, ndcg_at_1_test)) return hits, ndcg, hits_test, ndcg_test
def decoder(self, decoder_inputs: torch.Tensor, init_states: torch.Tensor, encoder_outputs: torch.Tensor, encoder_decoder_veracities, **kargs): """ Decoder with attention over pad_packed_sequence outputted from encoder. We need to loop step by step in the decoder. Usually, we can input the whole sequence into the decoder and let it run quickly. However, we are now using attention mechanism to derive context of each time-step in decoder. Therefore, we need to loop step-by-step based on input sequence of decoder. :param decoder_inputs: shape (batch_size, seq_length) :param init_states: This is the last hidden states outputted from Encoder :param encoder_outputs: This is pad_packed_sequence tensor with size (batch_size, seq_len, hidden_size) :param decoder_input_lengths: shape (batch_size, 1) :return: """ batch_size = decoder_inputs.size(0) max_target_length = decoder_inputs.size(1) # seq_length of d-tweets all_decoder_outputs = Variable(torch.zeros(max_target_length, batch_size, self.decoder_att.output_size)) use_cuda = kargs[KeyWordSettings.UseCuda] all_decoder_outputs = torch_utils.gpu(all_decoder_outputs, use_cuda) decoder_hidden = init_states input_feed_init = torch.zeros(batch_size, self.decoder_att.hidden_size) input_feed_init = torch_utils.gpu(input_feed_init, use_cuda) # we start from 0 for t in range(max_target_length): tic = time.time() decoder_input = decoder_inputs[:, t] # Next input is current target # we need to change hidden_state after each step. decoder_output, decoder_hidden, decoder_attn, input_feed_init = self.decoder_att( decoder_input, decoder_hidden, encoder_outputs, encoder_decoder_veracities, input_feed_init) toc = time.time() all_decoder_outputs[t] = decoder_output # shape (batch_size, self.decoder_att.output_size) all_decoder_outputs = all_decoder_outputs.permute(1, 0, 2) # (B, max_target_length, decoder_output_size) logits = self.outputs2vocab(all_decoder_outputs) return logits, decoder_hidden
def _initialize(self, interactions: interactions.MatchInteraction): """ Parameters ---------- interactions: :class:`interactions.MatchInteraction` Returns ------- """ # put the model into cuda if use cuda self._net = my_utils.gpu(self._net, self._use_cuda) if self._optimizer_func is None: self._optimizer = optim.Adam(self._net.parameters(), weight_decay=self._reg_l2, lr=self._learning_rate) else: self._optimizer = self._optimizer_func(self._net.parameters()) # losses functions if self._loss == 'pointwise': self._loss_func = my_losses.pointwise_loss elif self._loss == "single_pointwise_square_loss": self._loss_func = my_losses.single_pointwise_square_loss elif self._loss == 'bpr': self._loss_func = my_losses.bpr_loss elif self._loss == 'hinge': self._loss_func = my_losses.hinge_loss elif self._loss == 'bce': # binary cross entropy self._loss_func = my_losses.pointwise_bceloss elif self._loss == "pce": self._loss_func = my_losses.positive_cross_entropy elif self._loss == "cosine_max_margin_loss_dvsh": self._loss_func = my_losses.cosine_max_margin_loss_dvsh elif self._loss == "cross_entropy": self._loss_func = my_losses.binary_cross_entropy_cls elif self._loss == "masked_cross_entropy": self._loss_func = my_losses.masked_binary_cross_entropy elif self._loss == "vanilla_cross_entropy": self._loss_func = my_losses.vanilla_cross_entropy elif self._loss == "regression_loss": self._loss_func = my_losses.regression_loss else: self._loss_func = my_losses.adaptive_hinge_loss FileHandler.myprint("Using: " + str(self._loss_func))
def __init__(self, embedding_dim = 30, n_iter = 2, batch_size = 64, learning_rate = 1e-3, net = None, loss = None, use_cuda=False, vocab_size = 1, seq_len = 1): self._embedding_dim = embedding_dim self._n_iter = n_iter self._batch_size = batch_size self._learning_rate = learning_rate self._use_cuda = use_cuda if net != None: self._net = gpu(net,use_cuda) else: self._net = None self._loss = loss self._optimizer = None self._vocab_size = vocab_size self._seq_len = seq_len
def forward(self, query: torch.Tensor, document: torch.Tensor, verbose=False, **kargs): """Forward. of integer query tensor and document tensor """ max_left_len, max_right_len = query.size(1), document.size(1) # Process left & right input. # https://github.com/AdeDZY/K-NRM/blob/master/knrm/model/model_base.py#L96 tensor_mask = torch_utils.create_mask_tensor(query, document, threshold=1) doc_mask = (document > 0).float() query_mask = (query > 0).float() # B, L embed_query = self.src_word_emb(query.long()) # (B, L, D) embed_doc = self.src_word_emb(document.long()) # (B, R, D) # normalizing vectors embed_query = F.normalize(embed_query, p=2, dim=-1) embed_doc = F.normalize(embed_doc, p=2, dim=-1) ################################# For Contextualized Representation using ELMO ############################# query_ids = kargs[KeyWordSettings.QueryIDs] # (B, ) doc_ids = kargs[KeyWordSettings.DocIDs] # (B, ) assert query_ids.shape == doc_ids.shape use_cuda = kargs[KeyWordSettings.UseCuda] query_char_repr = self.left_elmo_tensor[query_ids] doc_char_repr = self.right_elmo_tensor[doc_ids] # I have to load to gpu at this step because left_tensor is too large to load to GPU query_char_repr = torch_utils.gpu(query_char_repr, use_cuda) # (B, L, D1) doc_char_repr = torch_utils.gpu(doc_char_repr, use_cuda) # (B, R, D1) assert query_char_repr.size(1) == embed_query.size(1) assert doc_char_repr.size(1) == embed_doc.size(1) ############################################################################################### q_convs, d_convs = [], [] q_ctx_convs, d_ctx_convs = [], [] for q_conv, d_conv, \ q_context_conv, d_context_conv in zip(self.q_convs, self.d_convs, self.q_context_convs, self.d_context_convs): q_out = q_conv(embed_query).transpose( 1, 2) # to shape (B, D, L) => (B, F, L) => (B, L, F) d_out = d_conv(embed_doc).transpose( 1, 2) # to shape (B, D, R) => (B, F, R) => (B, R, F) q_out = F.normalize(q_out, p=2, dim=-1) # good stuff for relevance matching d_out = F.normalize(d_out, p=2, dim=-1) q_convs.append(q_out) d_convs.append(d_out) q_ctx_out = q_context_conv(query_char_repr).transpose(1, 2) # B, L, F d_ctx_out = d_context_conv(doc_char_repr).transpose(1, 2) # B, R, F q_ctx_out = F.normalize(q_ctx_out, p=2, dim=-1) d_ctx_out = F.normalize(d_ctx_out, p=2, dim=-1) q_ctx_convs.append(q_ctx_out) d_ctx_convs.append(d_ctx_out) output_phis = [] for idx in range(self.max_ngram): query_local_context = q_ctx_convs[idx] # (B, L, D) doc_local_context = d_ctx_convs[idx] # (B, R, D) sim_mat = self._get_sim_matrix(q_convs[idx], d_convs[idx]) sim_mat = sim_mat * tensor_mask if self.attention_type == AttentionType.UsingDotProductOnly: # using sim_mat, context_mat, sim_mat - context_mat, sim_mat * context_mat # [S, L, S - L, S * L] context_aware_mat = self._get_sim_matrix( query_local_context, doc_local_context) * tensor_mask tensors = torch.stack([ sim_mat, context_aware_mat, sim_mat - context_aware_mat, sim_mat * context_aware_mat ], dim=-1) # B, L, R, C elif self.attention_type == AttentionType.UsingDotProductDisim: # using sim_mat, context_mat, sim_mat - context_mat, dissimilarity * sim_mat # [S, L, S - L, S * D] context_aware_mat = self._get_sim_matrix( query_local_context, doc_local_context) * tensor_mask dissimilarity = self._get_disimilarity_mat( query_local_context, doc_local_context, tensor_mask, self.use_average_dcompositional_att) * tensor_mask tensors = torch.stack([ sim_mat, context_aware_mat, sim_mat - context_aware_mat, sim_mat * dissimilarity ], dim=-1) # B, L, R, C elif self.attention_type == AttentionType.UsingBilinearOnly: # [S, B, S - B, S * B] bilinear = self._get_bilinear_attention( query_local_context, doc_local_context) * tensor_mask tensors = torch.stack([ sim_mat, bilinear, sim_mat - bilinear, bilinear * sim_mat ], dim=-1) # B, L, R, C elif self.attention_type == AttentionType.UsingBilinearDissim: # [S, B, S - B, S * D] bilinear = self._get_bilinear_attention( query_local_context, doc_local_context) * tensor_mask dissimilarity = self._get_disimilarity_mat( query_local_context, doc_local_context, tensor_mask, self.use_average_dcompositional_att) * tensor_mask tensors = torch.stack([ sim_mat, bilinear, sim_mat - bilinear, dissimilarity * sim_mat ], dim=-1) # B, L, R, C tensors = tensors.permute(0, 3, 1, 2) # (B, C, L, R) phi = torch.flatten(self.head_conv_layers[0](tensors), start_dim=1) output_phis.append(phi) phi = torch.cat(output_phis, dim=-1) # (B, x) if self.use_visual: # a list of size B, where each element is a list of image tensors t1 = time.time() query_images_indices = kargs[KeyWordSettings.QueryImagesIndices] B1, n1, M1 = query_images_indices.shape # expected shape assert n1 == 1 query_images = self.full_left_images_tensor[ query_images_indices.flatten().long()] # B1 * n1 * M1, VD doc_imgs_indices = kargs[ KeyWordSettings. DocImagesIndices] # (B, n, M2, VD) or (B, M2, VD) B, n, M2 = doc_imgs_indices.shape # expected shape images_mask = torch_utils.create_mask_tensor_image( query_images_indices, doc_imgs_indices) # (B, n, M1, M2) doc_images = self.full_right_images_tensor[ doc_imgs_indices.flatten().long()] # B * n * M2, VD left_feats = self.image_fc1( query_images ) # (B * n1 * M1, H) we don't want visual_cnn on 30 duplicated queries images (not wise) right_feats = self.image_fc1(doc_images) # (B * n * M2, H) left_feats = left_feats.view(B1, M1, self.last_visual_size) if B1 == 1: left_feats = left_feats.expand( B, M1, self.last_visual_size) # during testing right_feats = right_feats.view(B, n * M2, self.last_visual_size) right_feats = F.normalize(right_feats, p=2, dim=-1) left_feats = F.normalize(left_feats, p=2, dim=-1) scores = torch.bmm(left_feats, right_feats.permute(0, 2, 1)) # (B, M1, n * M2) scores = scores.view(B, M1, n, M2).permute(0, 2, 1, 3) # (B, n, M1, M2) # masking assert scores.size() == images_mask.size(), (scores.size(), images_mask.size()) scores = scores * images_mask scores = scores.view(B * n, M1, M2) visual_scores, _ = torch.flatten(scores, start_dim=1).max(-1) visual_scores = visual_scores.unsqueeze(-1) # (B * n, 1) phi = torch.cat([phi, visual_scores], dim=-1) t2 = time.time() # print("Running time of CNN in forward: ", (t2 - t1), "seconds") out = self.linear(phi) if verbose: print("out: ", out.squeeze()) # print("After dense and tanh: ", out) if KeyWordSettings.OutputRankingKey in kargs and kargs[ KeyWordSettings.OutputRankingKey] and self.use_visual: return torch.cat([out, torch.flatten(scores, start_dim=1)], dim=-1) # for error analysis (B, 2) return out.squeeze()
def fit( self, train_iteractions: interactions.MatchInteraction, verbose=True, # for printing out evaluation during training topN=10, val_interactions: interactions.MatchInteraction = None, test_interactions: interactions.MatchInteraction = None): """ Fit the model. Parameters ---------- train_iteractions: :class:`matchzoo.DataPack` The input sequence dataset. val_interactions: :class:`matchzoo.DataPack` test_interactions: :class:`matchzoo.DataPack` """ self._initialize(train_iteractions) best_hit, best_ndcg, best_epoch, test_ndcg, test_hit = 0, 0, 0, 0, 0 test_results_dict = None iteration_counter = 0 count_patience_epochs = 0 for epoch_num in range(self._n_iter): # ------ Move to here ----------------------------------- # self._net.train(True) query_ids, left_contents, left_lengths, \ doc_ids, right_contents, right_lengths, \ neg_docs_ids, neg_docs_contents, neg_docs_lens = self._sampler.get_train_instances(train_iteractions, self._num_negative_samples) queries, query_content, query_lengths, \ docs, doc_content, doc_lengths, \ neg_docs, neg_docs_contents, neg_docs_lens = my_utils.shuffle(query_ids, left_contents, left_lengths, doc_ids, right_contents, right_lengths, neg_docs_ids, neg_docs_contents, neg_docs_lens) epoch_loss, total_pairs = 0.0, 0 t1 = time.time() for (minibatch_num, (batch_query, batch_query_content, batch_query_len, batch_doc, batch_doc_content, batch_docs_lens, batch_neg_docs, batch_neg_doc_content, batch_neg_docs_lens)) \ in enumerate(my_utils.minibatch(queries, query_content, query_lengths, docs, doc_content, doc_lengths, neg_docs, neg_docs_contents, neg_docs_lens, batch_size = self._batch_size)): # add idf here... query_idfs = None if len(TFIDF.get_term_idf()) != 0: query_idf_dict = TFIDF.get_term_idf() query_idfs = [[ query_idf_dict.get(int(word_idx), 0.0) for word_idx in row ] for row in batch_query_content] query_idfs = torch_utils.gpu( torch.from_numpy(np.array(query_idfs)).float(), self._use_cuda) batch_query = my_utils.gpu(torch.from_numpy(batch_query), self._use_cuda) batch_query_content = my_utils.gpu( torch.from_numpy(batch_query_content), self._use_cuda) batch_doc = my_utils.gpu(torch.from_numpy(batch_doc), self._use_cuda) batch_doc_content = my_utils.gpu( torch.from_numpy(batch_doc_content), self._use_cuda) batch_neg_doc_content = my_utils.gpu( torch.from_numpy(batch_neg_doc_content), self._use_cuda) total_pairs += self._batch_size * self._num_negative_samples self._optimizer.zero_grad() if self._loss in ["bpr", "hinge", "pce", "bce"]: loss = self._get_multiple_negative_predictions_normal( batch_query, batch_query_content, batch_doc, batch_doc_content, batch_neg_docs, batch_neg_doc_content, batch_query_len, batch_docs_lens, batch_neg_docs_lens, self._num_negative_samples, query_idf=query_idfs) epoch_loss += loss.item() iteration_counter += 1 # if iteration_counter % 2 == 0: break TensorboardWrapper.mywriter().add_scalar( "loss/minibatch_loss", loss.item(), iteration_counter) loss.backward() self._optimizer.step() epoch_loss /= float(total_pairs) TensorboardWrapper.mywriter().add_scalar("loss/epoch_loss_avg", epoch_loss, epoch_num) # print("Number of Minibatches: ", minibatch_num, "Avg. loss of epoch: ", epoch_loss) t2 = time.time() epoch_train_time = t2 - t1 if verbose: # validation after each epoch t1 = time.time() assert len(val_interactions.unique_queries_test ) in KeyWordSettings.QueryCountVal, len( val_interactions.unique_queries_test) result_val = self.evaluate(val_interactions, topN) hits = result_val["hits"] ndcg = result_val["ndcg"] t2 = time.time() valiation_time = t2 - t1 if epoch_num and epoch_num % self._testing_epochs == 0: t1 = time.time() assert len(test_interactions.unique_queries_test ) in KeyWordSettings.QueryCountTest result_test = self.evaluate(test_interactions, topN) hits_test = result_test["hits"] ndcg_test = result_test["ndcg"] t2 = time.time() testing_time = t2 - t1 TensorboardWrapper.mywriter().add_scalar( "hit/hit_test", hits_test, epoch_num) TensorboardWrapper.mywriter().add_scalar( "ndcg/ndcg_test", ndcg_test, epoch_num) FileHandler.myprint( '|Epoch %03d | Test hits@%d = %.5f | Test ndcg@%d = %.5f | Testing time: %04.1f(s)' % (epoch_num, topN, hits_test, topN, ndcg_test, testing_time)) TensorboardWrapper.mywriter().add_scalar( "hit/hits_val", hits, epoch_num) TensorboardWrapper.mywriter().add_scalar( "ndcg/ndcg_val", ndcg, epoch_num) FileHandler.myprint( '|Epoch %03d | Train time: %04.1f(s) | Train loss: %.3f' '| Vad hits@%d = %.5f | Vad ndcg@%d = %.5f | Validation time: %04.1f(s)' % (epoch_num, epoch_train_time, epoch_loss, topN, hits, topN, ndcg, valiation_time)) if hits > best_hit or (hits == best_hit and ndcg > best_ndcg): # if (hits + ndcg) > (best_hit + best_ndcg): count_patience_epochs = 0 with open(self.saved_model, "wb") as f: torch.save(self._net.state_dict(), f) # test_results_dict = result_test best_hit, best_ndcg, best_epoch = hits, ndcg, epoch_num # test_hit, test_ndcg = hits_test, ndcg_test else: count_patience_epochs += 1 if self._early_stopping_patience and count_patience_epochs > self._early_stopping_patience: FileHandler.myprint( "Early Stopped due to no better performance in %s epochs" % count_patience_epochs) break if np.isnan(epoch_loss) or epoch_loss == 0.0: raise ValueError( 'Degenerate epoch loss: {}'.format(epoch_loss)) FileHandler.myprint("Closing tensorboard") TensorboardWrapper.mywriter().close() FileHandler.myprint( 'Best result: | vad hits@%d = %.5f | vad ndcg@%d = %.5f | epoch = %d' % (topN, best_hit, topN, best_ndcg, best_epoch)) FileHandler.myprint_details( json.dumps(test_results_dict, sort_keys=True, indent=2))
def evaluate(self, testRatings: interactions.MatchInteraction, K: int, output_ranking=False, **kargs): """ I decided to move this function into Fitter class since different models have different ways to evaluate (i.e. different data sources to use). Therefore, it is needed to have seperate evaluation methods in each Fitter class. Furthermore, I notice that this function uses _use_cuda which is a property of Fitter class. Parameters ---------- testRatings K output_ranking kargs Returns ------- """ ndcg_metric = normalized_discounted_cumulative_gain.NormalizedDiscountedCumulativeGain hits, ndcgs = [], [] ndcgs_at_1 = [] list_error_analysis = [] for query, candidates in testRatings.unique_queries_test.items(): docs, labels, doc_contents, _ = candidates query_content = testRatings.dict_query_contents[query] query_len = [testRatings.dict_query_lengths[query]] * len(labels) doc_lens = [testRatings.dict_doc_lengths[d] for d in docs] query_idfs = None if len(TFIDF.get_term_idf()) > 0: query_idf_dict = TFIDF.get_term_idf() query_idfs = [ query_idf_dict.get(int(word_idx), 0.0) for word_idx in query_content ] query_idfs = np.tile(query_idfs, (len(labels), 1)) query_idfs = my_utils.gpu( torch.from_numpy(np.array(query_idfs)).float(), self._use_cuda) query_content = np.tile( query_content, (len(labels), 1)) # len(labels), query_contnt_leng) doc_contents = np.array(doc_contents) query_content = my_utils.gpu(query_content) doc_contents = my_utils.gpu(doc_contents) query_content = my_utils.gpu( my_utils.numpy2tensor(query_content, dtype=torch.int), self._use_cuda) doc_contents = my_utils.gpu( my_utils.numpy2tensor(doc_contents, dtype=torch.int), self._use_cuda) predictions = self._net.predict(query_content, doc_contents, query_lens=query_len, docs_lens=doc_lens, query_idf=query_idfs) ndcg_mz = ndcg_metric(K)(labels, predictions) ndcgs_at_1.append(ndcg_metric(1)(labels, predictions)) ndcgs.append(ndcg_mz) positive_docs = set( [d for d, lab in zip(docs, labels) if lab == 1]) indices = np.argsort( -predictions)[:K] # indices of items with highest scores docs = np.array(docs) ranked_docs = docs[indices] if output_ranking: labels = np.array(labels) ranked_labels = labels[indices] scores = predictions[indices] assert scores.shape == ranked_labels.shape ranked_doc_list = [{ KeyWordSettings.Doc_cID: int(d), KeyWordSettings.Doc_cLabel: int(lab), KeyWordSettings.Doc_wImages: [], KeyWordSettings.Doc_wContent: testRatings.dict_doc_raw_contents[d], KeyWordSettings.Relevant_Score: float(score) } for d, lab, score in zip(ranked_docs, ranked_labels, scores)] q_details = { KeyWordSettings.Query_id: int(query), KeyWordSettings.Query_Images: [], KeyWordSettings.Ranked_Docs: ranked_doc_list, KeyWordSettings.Query_Content: testRatings.dict_query_raw_contents[query] } list_error_analysis.append(q_details) hit = my_evaluator.getHitRatioForList(ranked_docs, positive_docs) # ndcg_mine = getNDCGForList(ranklist, positive_docs) hits.append(hit) # assert abs(ndcg_mine - ndcg_mz) < 1e-10, (ndcg_mine, ndcg_mz) results = {} results["ndcg"] = np.nanmean(ndcgs) results["ndcg_list"] = ndcgs results["hits"] = np.nanmean(hits) results["hits_list"] = hits results["ndcg@1"] = np.nanmean(ndcgs_at_1) results["ndcg@1_list"] = ndcgs_at_1 if output_ranking: return results, sorted(list_error_analysis, key=lambda x: x["qid"]) return results
def fit(self, interactions, verbose=True, topN = 10, vadRatings = None, vadNegatives = None, testRatings = None, testNegatives = None, adjNetwork = None, user_user_sppmi = None, item_item_sppmi = None, user_user_sim = None, item_item_sim = None, alpha_gau: float = None, gamma_gau: float = None, beta_gau: float = None): """ Fit the model. Parameters ---------- interactions: :class:`interactions.Interactions` The input sequence dataset. vadRatings: :class:`list[list[int]]` vadNegatives: :class:`list[list[int]]` testRatings: :class:`list[list[int]]` testNegatives: :class:`list[list[int]]` Negative samples of every pair of (user, item) in testRatings. shape (bs, 100) 100 negative samples """ self._sampler.set_interactions(interactions) if not self._initialized(): self._initialize(interactions) best_map, best_ndcg, best_epoch, test_ndcg, test_map = 0, 0, 0, 0, 0 test_results_dict = None for epoch_num in range(self._n_iter): user_ids, item_ids, neg_items_ids = self._sampler.get_train_instances(interactions, self._num_negative_samples, random_state = self._random_state) self._check_input(user_ids, item_ids) users, items, neg_items = my_utils.shuffle(user_ids, item_ids, neg_items_ids, random_state = self._random_state) user_ids_tensor = my_utils.gpu(torch.from_numpy(users), self._use_cuda) item_ids_tensor = my_utils.gpu(torch.from_numpy(items), self._use_cuda) neg_item_ids_tensor = my_utils.gpu(torch.from_numpy(neg_items), self._use_cuda) self._check_shape(user_ids_tensor, item_ids_tensor, neg_item_ids_tensor, self._num_negative_samples) epoch_loss = 0.0 t1 = time.time() visited_users = set() visited_users_sppmi = set() visited_item_sppmi = set() visited_user_sim = set() visited_item_sim = set() for (minibatch_num, (batch_user, batch_item, batch_negatives)) in enumerate(my_utils.minibatch(user_ids_tensor, item_ids_tensor, neg_item_ids_tensor, batch_size = self._batch_size)): # need to duplicate batch_user and batch_item network = self._prepare_network_input(batch_user, visited_users, adjNetwork) user_user_sppmi_selected = self._select_user_user_sppmi_input(batch_user, visited_users_sppmi, user_user_sppmi) item_item_sppmi_selected = self._select_user_user_sppmi_input(batch_item, visited_item_sppmi, item_item_sppmi) user_user_sim_selected = self._select_user_user_sppmi_input(batch_item, visited_user_sim, user_user_sim) item_item_sim_selected = self._select_user_user_sppmi_input(batch_item, visited_item_sim, item_item_sim) self._optimizer.zero_grad() loss = self._get_loss(batch_user, batch_item, network, user_user_sppmi_selected, item_item_sppmi_selected, user_user_sim_selected, item_item_sim_selected, alpha_gau, gamma_gau, beta_gau) epoch_loss += loss.item() loss.backward() self._optimizer.step() epoch_loss /= minibatch_num + 1 t2 = time.time() epoch_train_time = t2 - t1 if verbose: # validation after each epoch t1 = time.time() result_val = self.evaluate(vadRatings, vadNegatives, topN) mapks = result_val["map"] ndcg = result_val["ndcg"] recall = result_val["recall"] result_test = self.evaluate(testRatings, testNegatives, topN) maps_test = result_test["map"] ndcg_test = result_test["ndcg"] recall_test = result_test["recall"] t2 = time.time() eval_time = t2 - t1 self.output_handler.myprint('|Epoch %d | Train time: %d (s) | Train loss: %.5f | Eval time: %.3f (s) ' '| Vad mapks@%d = %.5f | Vad ndcg@%d = %.5f | Vad recall@%d = %.5f ' '| Test mapks@%d = %.5f | Test ndcg@%d = %.5f | Test recall@%d = %.5f' % (epoch_num, epoch_train_time, epoch_loss, eval_time, topN, mapks, topN, ndcg, topN, recall, topN, maps_test, topN, ndcg_test, topN, recall_test)) if ndcg > best_ndcg: with open(self.saved_model, "wb") as f: torch.save(self._net, f) test_results_dict = result_test best_map, best_ndcg, best_epoch = mapks, ndcg, epoch_num test_map, test_ndcg = maps_test, ndcg_test if np.isnan(epoch_loss) or epoch_loss == 0.0: raise ValueError('Degenerate epoch loss: {}'.format(epoch_loss)) self.output_handler.myprint('Best result: ' '| vad precisions@%d = %.3f | vad ndcg@%d = %.3f ' '| test precisions@%d = %.3f | test ndcg@%d = %.3f | epoch = %d' % (topN, best_map, topN, best_ndcg, topN, test_map, topN, test_ndcg, best_epoch)) self.output_handler.myprint_details(json.dumps(test_results_dict, sort_keys = True, indent = 2))
def fit_models(args): if not os.path.exists(args.log): os.mkdir(args.log) curr_date = datetime.datetime.now().timestamp() # seconds # folder to store all outputed files of a run secondary_log_folder = os.path.join(args.log, "log_results_%s" % (int(curr_date))) if not os.path.exists(secondary_log_folder): os.mkdir(secondary_log_folder) logfolder_result = os.path.join(secondary_log_folder, "%s_result.txt" % int(curr_date)) FileHandler.init_log_files(logfolder_result) settings = json.dumps(vars(args), sort_keys=True, indent=2) FileHandler.myprint("Running script " + str(os.path.realpath(__file__))) FileHandler.myprint(settings) FileHandler.myprint("Setting seed to " + str(args.seed)) seed = args.seed random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False torch.backends.cudnn.enabled = False if args.cuda: torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) index2queries = dict( (y, x) for x, y in json.loads(open(args.query_mapped).read()).items()) index2docs = dict( (y, x) for x, y in json.loads(open(args.article_mapped).read()).items()) root = args.path use_reranking = "reranking" in root t1 = time.time() elmo_queries_path = os.path.join(args.elmo_feats, "queries_feats.pth") elmo_docs_path = os.path.join(args.elmo_feats, "articles_feats.pth") elmo_loader = load_data.ElmoLoader(elmo_queries_path, elmo_docs_path, args.fixed_length_left, args.fixed_length_right) load_data_func = elmo_loader.elmo_load_data train_pack = load_data_func(root, 'train', prefix=args.dataset) valid_pack = load_data_func(root, 'dev', prefix=args.dataset) predict_pack = load_data_func(root, 'test', prefix=args.dataset) if use_reranking: FileHandler.myprint("Using Re-Ranking Dataset..........") predict2_hard_pack = load_data_func(root, 'test2_hard', prefix=args.dataset) a = train_pack.left["text_left"].str.lower().str.split().apply(len).max() b = valid_pack.left["text_left"].str.lower().str.split().apply(len).max() c = predict_pack.left["text_left"].str.lower().str.split().apply(len).max() max_query_length = max([a, b, c]) min_query_length = min([a, b, c]) a = train_pack.right["text_right"].str.lower().str.split().apply(len).max() b = valid_pack.right["text_right"].str.lower().str.split().apply(len).max() c = predict_pack.right["text_right"].str.lower().str.split().apply( len).max() max_doc_length = max([a, b, c]) min_doc_length = min([a, b, c]) FileHandler.myprint("Min query length, " + str(min_query_length) + " Min doc length " + str(min_doc_length)) FileHandler.myprint("Max query length, " + str(max_query_length) + " Max doc length " + str(max_doc_length)) if args.use_visual: image_loader = load_data.ImagesLoader( left_pth_file=args.left_images_features, max_num_left_images=args.n_img_in_query, right_pth_file=args.right_images_features, max_num_right_images=args.n_img_in_doc, use_cuda=args.cuda) data_packs = [train_pack, valid_pack, predict_pack] if use_reranking: data_packs.append(predict2_hard_pack) image_loader.fit(data_packs) # memory-intensive (~10Gb RAM) train_pack = image_loader.transform(train_pack) valid_pack = image_loader.transform(valid_pack) predict_pack = image_loader.transform(predict_pack) if use_reranking: predict2_hard_pack = image_loader.transform(predict2_hard_pack) print(image_loader.left_tensor.size(), image_loader.right_tensor.size()) preprocessor = mz.preprocessors.ElmoPreprocessor(args.fixed_length_left, args.fixed_length_right) print('parsing data') train_processed = preprocessor.fit_transform( train_pack) # This is a DataPack valid_processed = preprocessor.transform(valid_pack) predict_processed = preprocessor.transform(predict_pack) train_interactions = MatchInteractionVisual(train_processed) valid_interactions = MatchInteractionVisual(valid_processed) test_interactions = MatchInteractionVisual(predict_processed) if use_reranking: predict2_processed = preprocessor.transform(predict2_hard_pack) predict2_interactions = MatchInteractionVisual(predict2_processed) FileHandler.myprint('done extracting') t2 = time.time() FileHandler.myprint('loading data time: %d (seconds)' % (t2 - t1)) FileHandler.myprint("Building model") print("Loading word embeddings......") t1_emb = time.time() term_index = preprocessor.context['vocab_unit'].state['term_index'] glove_embedding = mz.datasets.embeddings.load_glove_embedding( dimension=args.word_embedding_size, term_index=term_index) embedding_matrix = glove_embedding.build_matrix(term_index) l2_norm = np.sqrt((embedding_matrix * embedding_matrix).sum(axis=1)) embedding_matrix = embedding_matrix / l2_norm[:, np.newaxis] t2_emb = time.time() print("Time to load word embeddings......", (t2_emb - t1_emb)) match_params = {} match_params['embedding'] = embedding_matrix match_params["embedding_freeze"] = True # freezing word embeddings match_params["fixed_length_left"] = args.fixed_length_left match_params["fixed_length_right"] = args.fixed_length_right match_params['dropout'] = 0.1 match_params['filters'] = args.filters match_params["conv_layers"] = args.conv_layers match_params["filters_count_pacrr"] = args.filters_count_pacrr match_params["n_s"] = args.n_s match_params["max_ngram"] = args.max_ngram match_params["head_cnn_type"] = args.head_cnn_type match_params["use_visual"] = args.use_visual match_params[ "use_average_dcompositional_att"] = args.use_average_dcompositional_att match_params["attention_type"] = args.attention_type # contextualized part match_params["left_elmo_tensor"] = elmo_loader.left_tensor_feats match_params["right_elmo_tensor"] = elmo_loader.right_tensor_feats match_params["elmo_vec_size"] = 1024 if args.use_visual: match_params["visual_feature_size"] = image_loader.visual_features_size image_loader.left_tensor = torch_utils.gpu(image_loader.left_tensor, args.cuda) image_loader.right_tensor = torch_utils.gpu(image_loader.right_tensor, args.cuda) match_params["full_left_images_tensor"] = image_loader.left_tensor match_params["full_right_images_tensor"] = image_loader.right_tensor match_model = multimodal_attention_network.MultiModalAttentionNetwork( match_params) FileHandler.myprint("Fitting Model") if args.use_visual: FileHandler.myprint("Using both Textual and Visual features.......") fit_model = fitter.VisualFitter(net=match_model, loss=args.loss_type, n_iter=args.epochs, batch_size=args.batch_size, learning_rate=args.lr, early_stopping=args.early_stopping, use_cuda=args.cuda, num_negative_samples=args.num_neg, logfolder=secondary_log_folder, curr_date=curr_date, use_visual=args.use_visual, image_loader=image_loader, index2queries=index2queries, index2docs=index2docs) else: FileHandler.myprint("Using Textual content only....") fit_model = contextualized_fitter.ContextualizedFitter( net=match_model, loss=args.loss_type, n_iter=args.epochs, batch_size=args.batch_size, learning_rate=args.lr, early_stopping=args.early_stopping, use_cuda=args.cuda, num_negative_samples=args.num_neg, logfolder=secondary_log_folder, curr_date=curr_date) try: fit_model.fit(train_interactions, verbose=True, topN=args.topk, val_interactions=valid_interactions, test_interactions=test_interactions) fit_model.load_best_model(valid_interactions, test_interactions, topN=args.topk) if use_reranking: fit_model.load_best_model_test2_test3(predict2_interactions, None, topN=args.topk) except KeyboardInterrupt: FileHandler.myprint('Exiting from training early') t10 = time.time() FileHandler.myprint('Total time: %d (seconds)' % (t10 - t1))
def fit( self, train_iteractions: interactions.MatchInteraction, verbose=True, # for printing out evaluation during training val_interactions: interactions.MatchInteraction = None, test_interactions: interactions.MatchInteraction = None): """ Fit the model. Parameters ---------- train_iteractions: :class:`matchzoo.DataPack` The input sequence dataset. val_interactions: :class:`matchzoo.DataPack` test_interactions: :class:`matchzoo.DataPack` """ self._initialize() best_ce, best_epoch, test_ce = sys.maxsize, 0, 0 test_results_dict = None iteration_counter = 0 count_patience_epochs = 0 for epoch_num in range(self._n_iter): # ------ Move to here ----------------------------------- # self._net.train(True) query_ids, left_contents, left_lengths, \ doc_ids, right_contents, target_contents, right_lengths = self._sampler.get_instances(train_iteractions) queries, query_content, query_lengths, \ docs, doc_content, target_contents, doc_lengths = my_utils.shuffle(query_ids, left_contents, left_lengths, doc_ids, right_contents, target_contents, right_lengths) epoch_loss, total_pairs = 0.0, 0 t1 = time.time() for (minibatch_num, (batch_query, batch_query_content, batch_query_len, batch_doc, batch_doc_content, batch_doc_target, batch_docs_lens)) \ in enumerate(my_utils.minibatch(queries, query_content, query_lengths, docs, doc_content, target_contents, doc_lengths, batch_size = self._batch_size)): t3 = time.time() batch_query = my_utils.gpu(torch.from_numpy(batch_query), self._use_cuda) batch_query_content = my_utils.gpu( torch.from_numpy(batch_query_content), self._use_cuda) # batch_query_len = my_utils.gpu(torch.from_numpy(batch_query_len), self._use_cuda) batch_doc = my_utils.gpu(torch.from_numpy(batch_doc), self._use_cuda) batch_doc_content = my_utils.gpu( torch.from_numpy(batch_doc_content), self._use_cuda) batch_doc_target = my_utils.gpu( torch.from_numpy(batch_doc_target), self._use_cuda) # batch_docs_lens = my_utils.gpu(torch.from_numpy(batch_docs_lens), self._use_cuda) total_pairs += batch_query.size(0) # (batch_size) self._optimizer.zero_grad() loss = self._get_loss(batch_query, batch_query_content, batch_doc, batch_doc_content, batch_query_len, batch_docs_lens, batch_doc_target) epoch_loss += loss.item() iteration_counter += 1 # if iteration_counter % 2 == 0: break TensorboardWrapper.mywriter().add_scalar( "loss/minibatch_loss", loss.item(), iteration_counter) loss.backward() torch.nn.utils.clip_grad_norm_(self._net.parameters(), self._clip) self._optimizer.step() t4 = time.time() # if iteration_counter % 100 == 0: print("Running time for each mini-batch: ", (t4 - t3), "s") epoch_loss /= float(total_pairs) TensorboardWrapper.mywriter().add_scalar("loss/epoch_loss_avg", epoch_loss, epoch_num) # print("Number of Minibatches: ", minibatch_num, "Avg. loss of epoch: ", epoch_loss) t2 = time.time() epoch_train_time = t2 - t1 if verbose: # validation after each epoch t1 = time.time() result_val = self.evaluate(val_interactions) val_ce = result_val["cross_entropy"] t2 = time.time() validation_time = t2 - t1 TensorboardWrapper.mywriter().add_scalar( "cross_entropy/val_ce", val_ce, epoch_num) FileHandler.myprint( '|Epoch %03d | Train time: %04.1f(s) | Train loss: %.3f' '| Val loss = %.5f | Validation time: %04.1f(s)' % (epoch_num, epoch_train_time, epoch_loss, val_ce, validation_time)) if val_ce < best_ce: count_patience_epochs = 0 with open(self.saved_model, "wb") as f: torch.save(self._net.state_dict(), f) # test_results_dict = result_test best_ce, best_epoch = val_ce, epoch_num else: count_patience_epochs += 1 if self._early_stopping_patience and count_patience_epochs > self._early_stopping_patience: FileHandler.myprint( "Early Stopped due to no better performance in %s epochs" % count_patience_epochs) break if np.isnan(epoch_loss) or epoch_loss == 0.0: raise ValueError( 'Degenerate epoch loss: {}'.format(epoch_loss)) FileHandler.myprint("Closing tensorboard") TensorboardWrapper.mywriter().close() FileHandler.myprint( 'Best result: | vad cross_entropy = %.5f | epoch = %d' % (best_ce, best_epoch)) FileHandler.myprint_details( json.dumps(test_results_dict, sort_keys=True, indent=2))
def _initialize(self): # put the model into cuda if use cuda self._net = my_utils.gpu(self._net, self._use_cuda) self._optimizer = optim.Adam(self._net.parameters(), lr=self._learning_rate)
def evaluate(self, testRatings: interactions.MatchInteractionVisual, K: int, output_ranking = False, **kargs): """ I decided to move this function into Fitter class since different models have different ways to evaluate (i.e. different data sources to use). Therefore, it is needed to have seperate evaluation methods in each Fitter class. Furthermore, I notice that this function uses _use_cuda which is a property of Fitter class. Parameters ---------- testRatings K output_ranking kargs Returns ------- """ ndcg_metric = normalized_discounted_cumulative_gain.NormalizedDiscountedCumulativeGain hits, ndcgs = [], [] ndcgs_at_1 = [] list_error_analysis = [] for query, candidates in tqdm(testRatings.unique_queries_test.items()): t3 = time.time() docs, labels, doc_contents, _ = candidates query_content = testRatings.dict_query_contents[query] query_images_indices = testRatings.dict_query_imgages[query] query_len = [testRatings.dict_query_lengths[query]] * len(labels) doc_lens = [testRatings.dict_doc_lengths[d] for d in docs] doc_images_indices = [testRatings.dict_doc_imgages[d] for d in docs] additional_data = {} additional_data[KeyWordSettings.Query_lens] = query_len additional_data[KeyWordSettings.Doc_lens] = doc_lens if len(TFIDF.get_term_idf()) > 0: query_idf_dict = TFIDF.get_term_idf() query_idfs = [query_idf_dict.get(int(word_idx), 0.0) for word_idx in query_content] query_idfs = np.tile(query_idfs, (len(labels), 1)) query_idfs = my_utils.gpu(torch.from_numpy(np.array(query_idfs)).float(), self._use_cuda) additional_data[KeyWordSettings.Query_Idf] = query_idfs if self.use_visual: t1 = time.time() query_images_indices = np.array(query_images_indices) assert query_images_indices.shape == (len(query_images_indices), ) query_images = query_images_indices.reshape(1, 1, len(query_images_indices)) doc_images = np.array(doc_images_indices) query_images = torch_utils.gpu(torch.from_numpy(query_images), self._use_cuda) doc_images = torch_utils.gpu(torch.from_numpy(doc_images), self._use_cuda) additional_data[KeyWordSettings.QueryImagesIndices] = query_images # (1, 1, M1) additional_data[KeyWordSettings.DocImagesIndices] = doc_images.unsqueeze(1) # (B, 1, M2) t2 = time.time() # print("Loading time images to gpu of validation: ", t2 - t1, "seconds") if output_ranking: additional_data[KeyWordSettings.OutputRankingKey] = True # for error analysis query_content = np.tile(query_content, (len(labels), 1)) # len(labels), query_contnt_leng) doc_contents = np.array(doc_contents) query_content = my_utils.gpu(query_content) doc_contents = my_utils.gpu(doc_contents) query_content = my_utils.gpu(my_utils.numpy2tensor(query_content, dtype=torch.int), self._use_cuda) doc_contents = my_utils.gpu(my_utils.numpy2tensor(doc_contents, dtype=torch.int), self._use_cuda) additional_data[KeyWordSettings.QueryIDs] = np.array([query] * len(labels)) additional_data[KeyWordSettings.DocIDs] = np.array(docs) additional_data[KeyWordSettings.UseCuda] = self._use_cuda predictions = self._net.predict(query_content, doc_contents, **additional_data) if output_ranking: assert len(predictions.shape) == 1 _, M2 = doc_images.shape predictions = predictions.reshape(len(doc_lens), 1 + (len(query_images_indices) * M2)) predictions, visual_sims = predictions[:, 0], predictions[:, 1:] visual_sims = visual_sims.reshape(len(doc_lens), len(query_images_indices), M2) visual_sims = visual_sims.transpose(0, 2, 1) # (B, M2, M1) t4 = time.time() # print("Computing time of each query: ", (t4 - t3), "seconds") ndcg_mz = ndcg_metric(K)(labels, predictions) ndcgs_at_1.append(ndcg_metric(1)(labels, predictions)) ndcgs.append(ndcg_mz) positive_docs = set([d for d, lab in zip(docs, labels) if lab == 1]) indices = np.argsort(-predictions)[:K] # indices of items with highest scores docs = np.array(docs) ranked_docs = docs[indices] if output_ranking: labels = np.array(labels) ranked_labels = labels[indices] scores = predictions[indices] visual_scores = visual_sims[indices] # (B, M2 * M1)) due to transpose assert scores.shape == ranked_labels.shape ranked_doc_list = [{KeyWordSettings.Doc_cID: int(d), KeyWordSettings.Doc_URL: self.index2docs[int(d)], KeyWordSettings.Doc_cLabel: int(lab), KeyWordSettings.Doc_wImages: ["%s %s" % (x, str(y)) for x, y in zip(list(map(self.image_loader.right_img_index2path.get, testRatings.dict_doc_imgages[d])), visual_score.tolist())], KeyWordSettings.Doc_wContent: testRatings.dict_doc_raw_contents[d], KeyWordSettings.Relevant_Score: float(score)} for d, lab, score, visual_score in zip(ranked_docs, ranked_labels, scores, visual_scores)] q_details = {KeyWordSettings.Query_id: int(query), KeyWordSettings.Query_TweetID: "http://twitter.com/user/status/" + self.index2queries[int(query)], KeyWordSettings.Query_Images: list(map(self.image_loader.left_img_index2path.get, query_images_indices)), KeyWordSettings.Ranked_Docs: ranked_doc_list, KeyWordSettings.Query_Content: testRatings.dict_query_raw_contents[query]} list_error_analysis.append(q_details) hit = my_evaluator.getHitRatioForList(ranked_docs, positive_docs) # ndcg_mine = getNDCGForList(ranklist, positive_docs) hits.append(hit) # assert abs(ndcg_mine - ndcg_mz) < 1e-10, (ndcg_mine, ndcg_mz) results = {} results["ndcg"] = np.nanmean(ndcgs) results["ndcg_list"] = ndcgs results["hits"] = np.nanmean(hits) results["hits_list"] = hits results["ndcg@1"] = np.nanmean(ndcgs_at_1) results["ndcg@1_list"] = ndcgs_at_1 if output_ranking: return results, sorted(list_error_analysis, key=lambda x: x["qid"]) return results
def evaluate(model: BaseModel, testRatings: interactions.MatchInteraction, K: int, _use_cuda, output_ranking=False): """ We could extend it to add more metrics in the future Parameters ---------- model: a fitter (not wise) testRatings: the K: top k ranked documents output_ranking: output the ranked docs with respect to a query for error analysis Returns ------- """ ndcg_metric = normalized_discounted_cumulative_gain.NormalizedDiscountedCumulativeGain hits, ndcgs = [], [] list_error_analysis = [] for query, candidates in testRatings.unique_queries_test.items(): docs, labels, doc_contents, _ = candidates query_content = testRatings.dict_query_contents[query] query_len = [testRatings.dict_query_lengths[query]] * len(labels) doc_lens = [testRatings.dict_doc_lengths[d] for d in docs] query_idfs = None if len(TFIDF.get_term_idf()) > 0: query_idf_dict = TFIDF.get_term_idf() query_idfs = [ query_idf_dict.get(int(word_idx), 0.0) for word_idx in query_content ] query_idfs = np.tile(query_idfs, (len(labels), 1)) query_idfs = my_utils.gpu( torch.from_numpy(np.array(query_idfs)).float(), _use_cuda) query_content = np.tile( query_content, (len(labels), 1)) # len(labels), query_contnt_leng) doc_contents = np.array(doc_contents) query_content = my_utils.gpu(query_content) doc_contents = my_utils.gpu(doc_contents) query_content = my_utils.gpu( my_utils.numpy2tensor(query_content, dtype=torch.int), _use_cuda) doc_contents = my_utils.gpu( my_utils.numpy2tensor(doc_contents, dtype=torch.int), _use_cuda) predictions = model.predict(query_content, doc_contents, query_lens=query_len, docs_lens=doc_lens, query_idf=query_idfs) ndcg_mz = ndcg_metric(K)(labels, predictions) ndcgs.append(ndcg_mz) positive_docs = set([d for d, lab in zip(docs, labels) if lab == 1]) indices = np.argsort( -predictions)[:K] # indices of items with highest scores docs = np.array(docs) ranked_docs = docs[indices] if output_ranking: labels = np.array(labels) ranked_labels = labels[indices] scores = predictions[indices] assert scores.shape == ranked_labels.shape ranked_doc_list = [{ KeyWordSettings.Doc_cID: int(d), KeyWordSettings.Doc_cLabel: int(lab), KeyWordSettings.Doc_wImages: [], KeyWordSettings.Doc_wContent: testRatings.dict_doc_raw_contents[d], KeyWordSettings.Relevant_Score: float(score) } for d, lab, score in zip(ranked_docs, ranked_labels, scores)] q_details = { KeyWordSettings.Query_id: int(query), KeyWordSettings.Query_Images: [], KeyWordSettings.Ranked_Docs: ranked_doc_list, KeyWordSettings.Query_Content: testRatings.dict_query_raw_contents[query] } list_error_analysis.append(q_details) hit = getHitRatioForList(ranked_docs, positive_docs) hits.append(hit) # assert abs(ndcg_mine - ndcg_mz) < 1e-10, (ndcg_mine, ndcg_mz) results = {} results["ndcg"] = np.nanmean(ndcgs) results["ndcg_list"] = ndcgs results["hits"] = np.nanmean(hits) results["hits_list"] = hits if output_ranking: return results, sorted(list_error_analysis, key=lambda x: x["qid"]) return results