def __init__(self, ini_file): mamba.setup.Configuration.__init__(self, ini_file) sys.stdout.write( '[INIT] Loading C/C++ module implementing ranked Pubmed search...' ) self.document_ranker = ranker.Ranker() for file in self.globals['mentions'].split(' '): self.document_ranker.read_mentions(file) sys.stdout.write('done\n')
def build_index(): corpus_path = util.get_corpus_dir_path_from_args() preprocessor = preprocessing.Preprocessor(corpus_path) doc_to_terms: list[preprocessing.DocToTerms] = preprocessor.parse() indexer_ob = indexer.Indexer(doc_to_terms) inverted_index: dict[str, indexer.Posting] = indexer_ob.inverter_index() doc_id_name_index: dict[int, str] = indexer_ob.doc_id_to_doc_name_index() tf_idf_ranker = ranker.Ranker(inverted_index, doc_id_name_index) _tfidf = tf_idf_ranker.tfidf() print('Indexing completed..saving...') util.save_obj(doc_id_name_index, DOC_ID_NAME_INDEX_NAME) util.save_obj(inverted_index, INVERTED_INDEX_FILE_NAME) util.save_pandas_df_as_pickle(_tfidf, TFIDF_NAME_INDEX_NAME) print('Saved index for quick results for future queries')
self.all_count = 0.0 self.loss.zero_() self.rank.zero_() self.count = 0.0 sys.stdout.flush() return random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) user = sim_user.SynUser() ranker = ranker.Ranker() behavior_model = NetSynUser(user.vocabSize + 1) target_model = NetSynUser(user.vocabSize + 1) triplet_loss = TripletLossIP(margin=args.triplet_margin) # load pre-trained model behavior_model.load_state_dict(torch.load(args.pretrained_model, map_location=lambda storage, loc: storage)) # load pre-trained model target_model.load_state_dict(torch.load(args.pretrained_model, map_location=lambda storage, loc: storage)) if torch.cuda.is_available(): behavior_model.cuda() target_model.cuda() triplet_loss.cuda()
def _on_readable(self): kcode = self._stdscr.getch() k = curses.keyname(kcode) if k == '^[': n = self._stdscr.getch() nk = curses.keyname(n) kcode = kcode << 8 | n k = "M-%s" % curses.keyname(n) if hasattr(self, '_keylog'): self._keylog.write('k=[%10s] kcode=[%s]\n' % (k, kcode)) if k == 'KEY_UP' or k == '^P': self._selected_index -= 1 self._clamp_selected_index() self._update_results() elif k == 'KEY_DOWN' or k == '^N': self._selected_index += 1 self._clamp_selected_index() self._update_results() elif k == '^G': self.on_done(True) return elif kcode == ascii.NL: self.on_done(False) return elif k == 'KEY_BACKSPACE' or k == '^?': if self._filter_text_point > 0: before = self._filter_text[0:self._filter_text_point] after = self._filter_text[self._filter_text_point:] self._filter_text = "%s%s" % (before[:-1], after) self._filter_text_point -= 1 self._update_filter_text() elif k == '^D': before = self._filter_text[0:self._filter_text_point] after = self._filter_text[self._filter_text_point:] self._filter_text = "%s%s" % (before, after[1:]) self._update_filter_text() elif k == '^A': self._filter_text_point = 0 self._update_filter_text() elif k == '^E': self._filter_text_point = len(self._filter_text) self._update_filter_text() elif k == '^B' or k == 'KEY_LEFT': self._filter_text_point -= 1 self._filter_text_point = max( 0, min(self._filter_text_point, len(self._filter_text))) self._update_filter_text() elif k == '^F' or k == 'KEY_RIGHT': self._filter_text_point += 1 self._filter_text_point = max( 0, min(self._filter_text_point, len(self._filter_text))) self._update_filter_text() elif k == 'M-b': wordstarts = ranker.Ranker().get_starts(self._filter_text) wordstarts.append(len(self._filter_text)) candidates = [] for start in wordstarts: if start < self._filter_text_point: candidates.append(start) if len(candidates): self._filter_text_point = candidates[-1] self._filter_text_point = max( 0, min(self._filter_text_point, len(self._filter_text))) self._update_filter_text() elif k == 'M-f': wordstarts = ranker.Ranker().get_starts(self._filter_text) wordstarts.append(len(self._filter_text)) candidates = [] for start in wordstarts: if start > self._filter_text_point: candidates.append(start) if len(candidates): self._filter_text_point = candidates[0] self._filter_text_point = max( 0, min(self._filter_text_point, len(self._filter_text))) self._update_filter_text() elif k == '^K': before = self._filter_text[0:self._filter_text_point] self._filter_text = before self._update_filter_text() elif k == '^R': self.on_reindex_clicked() else: if not (k.startswith('^') or k.startswith('KEY_') or k.startswith('M-')): before = self._filter_text[0:self._filter_text_point] after = self._filter_text[self._filter_text_point:] self._filter_text = "%s%s%s" % (before, k, after) self._filter_text_point += 1 self._update_filter_text() self.set_filter_text(self._filter_text)
'tfidf': _tfidf, 'inverted': _inverted_index, 'did_name': _doc_id_name_index } print('Loading...') index = load_index() print('Ready...(type exit to terminate)') while True: query = input('what is the query?') # query = 'patient ARDS' if query == 'exit': break print('...') normalize_query: list[str] = preprocessing.query(query) tf_idf_ranker_q = ranker.Ranker(index['inverted'], index['did_name']) _tfidf_query = tf_idf_ranker_q.tfidf_query(normalize_query) document_results: [int, float] = ranker.top_10_relevant_documents( index['tfidf'], _tfidf_query) document_results = [{ 'document_name': index['did_name'][d_id[0]], 'similarity_score': d_id[1] } for d_id in document_results] print('Matching documents for the query - ', query) util.print_result(document_results, util.get_corpus_dir_path_from_args())