def query(): w = pickle.load(open('weights_from_query.pkl', 'rb')).squeeze() topk_vals, topk_idxs = torch.topk(w, 30) bottomk_vals, bottomk_idxs = torch.topk(-w, 30) docs, lookup = pickle.load(open('parsed_robust_queries.pkl', 'rb')) tf, df, idf = count_me(docs) inv_lookup = _.invert(lookup) print('Top30: ', [inv_lookup[idx] for idx in topk_idxs.tolist()]) print('Bottom30: ', [inv_lookup[idx] for idx in bottomk_idxs.tolist()]) glove = get_glove_lookup() glove_by_idx = _.map_keys( glove, lambda vec, token: lookup[token] if token in lookup else lookup['<unk>']) norms_by_idx = _.map_values(glove_by_idx, torch.norm) idxs_in_order = list(norms_by_idx.keys()) idfs_in_order = torch.tensor([idf[idx] for idx in idxs_in_order]) dfs_in_order = torch.tensor([df[idx] for idx in idxs_in_order]) tfs_in_order = torch.tensor([tf[idx] for idx in idxs_in_order]) norms_in_order = torch.tensor([norms_by_idx[idx] for idx in idxs_in_order]) w_subset = w[torch.tensor(idxs_in_order)] print(np.corrcoef(w_subset, tfs_in_order)[0, 1]) print(np.corrcoef(w_subset, dfs_in_order)[0, 1]) print(np.corrcoef(w_subset, idfs_in_order)[0, 1]) print(np.corrcoef(w_subset, norms_in_order)[0, 1]) print(np.corrcoef(w_subset, np.log(tfs_in_order + 1))[0, 1]) print(np.corrcoef(w_subset, np.log(dfs_in_order))[0, 1]) print(np.corrcoef(w_subset, np.log(idfs_in_order))[0, 1]) print(np.corrcoef(w_subset, np.log(norms_in_order + 1))[0, 1])
def _check(self, batch_num=0): if self.dont_smooth: smooth = 0.0 val_results = self.metrics_at_k(self.val_ranking_dataset, smooth) else: smooth, val_results = self._find_best_smooth() train_results = self.metrics_at_k(self.train_ranking_dataset, smooth) test_results = self.metrics_at_k(self.test_ranking_dataset, smooth) test_results_no_smooth = self.metrics_at_k(self.test_ranking_dataset, 0.0) self.experiment.record_metrics( _.assign({}, _.map_keys(train_results, lambda val, key: 'train_' + key), _.map_keys(test_results, lambda val, key: 'test_' + key), _.map_keys(test_results_no_smooth, lambda val, key: 'test_no_smooth_' + key), _.map_keys(val_results, lambda val, key: 'val_' + key)), batch_num)
def process_raw_candidates(query_name_to_id, queries, document_title_to_id, query_names, raw_ranking_candidates): ranking_candidates = _.pick(raw_ranking_candidates, query_names) lookup_by_title = lambda title: document_title_to_id.get(title) or 0 test_ranking_candidates = _.map_values( ranking_candidates, lambda candidate_names: _.map_(candidate_names, lookup_by_title)) return _.map_keys( test_ranking_candidates, lambda ranking, query_name: str(queries[ query_name_to_id[query_name]])[1:-1])
def _from_bibliography_entry( bibliography_entry: Union[dict, None] = None) -> dict: if not bibliography_entry: bibliography_entry = BibliographyEntryFactory.build() return pydash.map_keys(bibliography_entry, lambda _, key: "_id" if key == "id" else key)