コード例 #1
0
def query():
    w = pickle.load(open('weights_from_query.pkl', 'rb')).squeeze()
    topk_vals, topk_idxs = torch.topk(w, 30)
    bottomk_vals, bottomk_idxs = torch.topk(-w, 30)
    docs, lookup = pickle.load(open('parsed_robust_queries.pkl', 'rb'))
    tf, df, idf = count_me(docs)
    inv_lookup = _.invert(lookup)
    print('Top30: ', [inv_lookup[idx] for idx in topk_idxs.tolist()])
    print('Bottom30: ', [inv_lookup[idx] for idx in bottomk_idxs.tolist()])
    glove = get_glove_lookup()
    glove_by_idx = _.map_keys(
        glove, lambda vec, token: lookup[token]
        if token in lookup else lookup['<unk>'])
    norms_by_idx = _.map_values(glove_by_idx, torch.norm)
    idxs_in_order = list(norms_by_idx.keys())
    idfs_in_order = torch.tensor([idf[idx] for idx in idxs_in_order])
    dfs_in_order = torch.tensor([df[idx] for idx in idxs_in_order])
    tfs_in_order = torch.tensor([tf[idx] for idx in idxs_in_order])
    norms_in_order = torch.tensor([norms_by_idx[idx] for idx in idxs_in_order])
    w_subset = w[torch.tensor(idxs_in_order)]
    print(np.corrcoef(w_subset, tfs_in_order)[0, 1])
    print(np.corrcoef(w_subset, dfs_in_order)[0, 1])
    print(np.corrcoef(w_subset, idfs_in_order)[0, 1])
    print(np.corrcoef(w_subset, norms_in_order)[0, 1])
    print(np.corrcoef(w_subset, np.log(tfs_in_order + 1))[0, 1])
    print(np.corrcoef(w_subset, np.log(dfs_in_order))[0, 1])
    print(np.corrcoef(w_subset, np.log(idfs_in_order))[0, 1])
    print(np.corrcoef(w_subset, np.log(norms_in_order + 1))[0, 1])
コード例 #2
0
ファイル: metrics.py プロジェクト: dmh43/lm_ltr
 def _check(self, batch_num=0):
     if self.dont_smooth:
         smooth = 0.0
         val_results = self.metrics_at_k(self.val_ranking_dataset, smooth)
     else:
         smooth, val_results = self._find_best_smooth()
     train_results = self.metrics_at_k(self.train_ranking_dataset, smooth)
     test_results = self.metrics_at_k(self.test_ranking_dataset, smooth)
     test_results_no_smooth = self.metrics_at_k(self.test_ranking_dataset,
                                                0.0)
     self.experiment.record_metrics(
         _.assign({},
                  _.map_keys(train_results,
                             lambda val, key: 'train_' + key),
                  _.map_keys(test_results, lambda val, key: 'test_' + key),
                  _.map_keys(test_results_no_smooth,
                             lambda val, key: 'test_no_smooth_' + key),
                  _.map_keys(val_results, lambda val, key: 'val_' + key)),
         batch_num)
コード例 #3
0
ファイル: preprocessing.py プロジェクト: dmh43/lm_ltr
def process_raw_candidates(query_name_to_id, queries, document_title_to_id,
                           query_names, raw_ranking_candidates):
    ranking_candidates = _.pick(raw_ranking_candidates, query_names)
    lookup_by_title = lambda title: document_title_to_id.get(title) or 0
    test_ranking_candidates = _.map_values(
        ranking_candidates,
        lambda candidate_names: _.map_(candidate_names, lookup_by_title))
    return _.map_keys(
        test_ranking_candidates, lambda ranking, query_name: str(queries[
            query_name_to_id[query_name]])[1:-1])
コード例 #4
0
 def _from_bibliography_entry(
         bibliography_entry: Union[dict, None] = None) -> dict:
     if not bibliography_entry:
         bibliography_entry = BibliographyEntryFactory.build()
     return pydash.map_keys(bibliography_entry, lambda _, key: "_id"
                            if key == "id" else key)