Esempio n. 1
0
 def __init__(self, ini_file):
     mamba.setup.Configuration.__init__(self, ini_file)
     sys.stdout.write(
         '[INIT]  Loading C/C++ module implementing ranked Pubmed search...'
     )
     self.document_ranker = ranker.Ranker()
     for file in self.globals['mentions'].split(' '):
         self.document_ranker.read_mentions(file)
     sys.stdout.write('done\n')
Esempio n. 2
0
def build_index():
    corpus_path = util.get_corpus_dir_path_from_args()
    preprocessor = preprocessing.Preprocessor(corpus_path)
    doc_to_terms: list[preprocessing.DocToTerms] = preprocessor.parse()

    indexer_ob = indexer.Indexer(doc_to_terms)
    inverted_index: dict[str, indexer.Posting] = indexer_ob.inverter_index()
    doc_id_name_index: dict[int, str] = indexer_ob.doc_id_to_doc_name_index()

    tf_idf_ranker = ranker.Ranker(inverted_index, doc_id_name_index)
    _tfidf = tf_idf_ranker.tfidf()

    print('Indexing completed..saving...')
    util.save_obj(doc_id_name_index, DOC_ID_NAME_INDEX_NAME)
    util.save_obj(inverted_index, INVERTED_INDEX_FILE_NAME)
    util.save_pandas_df_as_pickle(_tfidf, TFIDF_NAME_INDEX_NAME)
    print('Saved index for quick results for future queries')
Esempio n. 3
0
        self.all_count = 0.0
        self.loss.zero_()
        self.rank.zero_()
        self.count = 0.0
        sys.stdout.flush()
        return


random.seed(args.seed)
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(args.seed)


user = sim_user.SynUser()
ranker = ranker.Ranker()

behavior_model = NetSynUser(user.vocabSize + 1)
target_model = NetSynUser(user.vocabSize + 1)
triplet_loss = TripletLossIP(margin=args.triplet_margin)
# load pre-trained model
behavior_model.load_state_dict(torch.load(args.pretrained_model, map_location=lambda storage, loc: storage))
# load pre-trained model
target_model.load_state_dict(torch.load(args.pretrained_model, map_location=lambda storage, loc: storage))

if torch.cuda.is_available():
    behavior_model.cuda()
    target_model.cuda()
    triplet_loss.cuda()

Esempio n. 4
0
    def _on_readable(self):
        kcode = self._stdscr.getch()
        k = curses.keyname(kcode)
        if k == '^[':
            n = self._stdscr.getch()
            nk = curses.keyname(n)
            kcode = kcode << 8 | n
            k = "M-%s" % curses.keyname(n)

        if hasattr(self, '_keylog'):
            self._keylog.write('k=[%10s] kcode=[%s]\n' % (k, kcode))

        if k == 'KEY_UP' or k == '^P':
            self._selected_index -= 1
            self._clamp_selected_index()
            self._update_results()
        elif k == 'KEY_DOWN' or k == '^N':
            self._selected_index += 1
            self._clamp_selected_index()
            self._update_results()
        elif k == '^G':
            self.on_done(True)
            return
        elif kcode == ascii.NL:
            self.on_done(False)
            return
        elif k == 'KEY_BACKSPACE' or k == '^?':
            if self._filter_text_point > 0:
                before = self._filter_text[0:self._filter_text_point]
                after = self._filter_text[self._filter_text_point:]
                self._filter_text = "%s%s" % (before[:-1], after)
                self._filter_text_point -= 1
                self._update_filter_text()
        elif k == '^D':
            before = self._filter_text[0:self._filter_text_point]
            after = self._filter_text[self._filter_text_point:]
            self._filter_text = "%s%s" % (before, after[1:])
            self._update_filter_text()
        elif k == '^A':
            self._filter_text_point = 0
            self._update_filter_text()
        elif k == '^E':
            self._filter_text_point = len(self._filter_text)
            self._update_filter_text()
        elif k == '^B' or k == 'KEY_LEFT':
            self._filter_text_point -= 1
            self._filter_text_point = max(
                0, min(self._filter_text_point, len(self._filter_text)))
            self._update_filter_text()
        elif k == '^F' or k == 'KEY_RIGHT':
            self._filter_text_point += 1
            self._filter_text_point = max(
                0, min(self._filter_text_point, len(self._filter_text)))
            self._update_filter_text()
        elif k == 'M-b':
            wordstarts = ranker.Ranker().get_starts(self._filter_text)
            wordstarts.append(len(self._filter_text))
            candidates = []
            for start in wordstarts:
                if start < self._filter_text_point:
                    candidates.append(start)
            if len(candidates):
                self._filter_text_point = candidates[-1]
                self._filter_text_point = max(
                    0, min(self._filter_text_point, len(self._filter_text)))
                self._update_filter_text()
        elif k == 'M-f':
            wordstarts = ranker.Ranker().get_starts(self._filter_text)
            wordstarts.append(len(self._filter_text))
            candidates = []
            for start in wordstarts:
                if start > self._filter_text_point:
                    candidates.append(start)
            if len(candidates):
                self._filter_text_point = candidates[0]
                self._filter_text_point = max(
                    0, min(self._filter_text_point, len(self._filter_text)))
                self._update_filter_text()
        elif k == '^K':
            before = self._filter_text[0:self._filter_text_point]
            self._filter_text = before
            self._update_filter_text()
        elif k == '^R':
            self.on_reindex_clicked()
        else:
            if not (k.startswith('^') or k.startswith('KEY_')
                    or k.startswith('M-')):
                before = self._filter_text[0:self._filter_text_point]
                after = self._filter_text[self._filter_text_point:]
                self._filter_text = "%s%s%s" % (before, k, after)
                self._filter_text_point += 1
                self._update_filter_text()
                self.set_filter_text(self._filter_text)
Esempio n. 5
0
        'tfidf': _tfidf,
        'inverted': _inverted_index,
        'did_name': _doc_id_name_index
    }


print('Loading...')
index = load_index()
print('Ready...(type exit to terminate)')

while True:
    query = input('what is the query?')
    # query = 'patient ARDS'

    if query == 'exit':
        break

    print('...')
    normalize_query: list[str] = preprocessing.query(query)
    tf_idf_ranker_q = ranker.Ranker(index['inverted'], index['did_name'])
    _tfidf_query = tf_idf_ranker_q.tfidf_query(normalize_query)

    document_results: [int, float] = ranker.top_10_relevant_documents(
        index['tfidf'], _tfidf_query)
    document_results = [{
        'document_name': index['did_name'][d_id[0]],
        'similarity_score': d_id[1]
    } for d_id in document_results]
    print('Matching documents for the query - ', query)
    util.print_result(document_results, util.get_corpus_dir_path_from_args())