import best q = best.BESTQuery("breast cancer", "gene", noAbsTxt=False) r = best.getRelevantBioEntities(q) print(r[0])
def best_search(self, query, kcw_path=None): t0 = time() # Type filter ent_types = [ "gene", "drug", "chemical compound", "target", "disease", "toxin", "transcription factor", "mirna", "pathway", "mutation" ] query_type = "All Entity Type" for ent_type in ent_types: if ent_type in query: query_type = ent_type break # Stopwords and filtering for BEST queries if not os.path.exists(os.path.join(os.path.expanduser('~'), 'nltk_data')): nltk.download('punkt') nltk.download('stopwords') stop_words = set(stopwords.words('english') + ['?'] + ['Why', 'What', 'How', 'Where', 'When', 'Who']) entity_set = [ 'COVID-19', 'SARS-CoV-2', 'hypertension', 'diabetes', 'heart', 'disease', 'obese', 'death', 'HCoV-19', 'HCoV', 'coronavirus', 'symptoms', 'incubation', 'periods', 'period', 'quarantine', 'asymptomatic', 'transmissions', 'fecal', 'excretion', 'decline', 'Wuhan', 'mortality', 'patients', 'stay', 'reproduction', 'risk', 'factor', 'factors', 'pregnancy', 'interval', 'absent', 'reported', 'length', 'diagnosed', 'United', 'States', 'isolated', 'CDC', 'WHO', 'vaccine', 'negative', 'animals', 'airbone', 'spread', 'blood', 'sanitizer', 'controlled', 'illness', 'friends', ] query_tokens = word_tokenize(query) new_query = '' for idx, query_token in enumerate(query_tokens): if query_token not in stop_words: if query_token in entity_set: new_query += query_token + ' ' # Get BEST result q = best.BESTQuery(new_query, noAbsTxt=False, filterObjectName=query_type) r = best.getRelevantBioEntities(q) # No result if len(r) == 1 and r[0]['rank'] == 0 and len(r[0].keys()) == 1: t1 = time() return {'ret': [], 'time': int(1000 * (t1 - t0))} parsed_result = { 'context': '', 'title': '', 'doc_idx': None, 'start_idx': 0, 'end_idx': 0, 'score': 0, 'metadata': { 'pubmed_id': '' }, 'answer': '' } outs = [] for r_idx, r_ in enumerate(r): parsed_result['context'] = r_['abstracts'][0] parsed_result['score'] = r_['score'] parsed_result['answer'] = r_['entityName'] parsed_result['metadata'] = self.get_doc_meta(r_['PMIDs'][0]) if len(parsed_result['metadata']) == 0: parsed_result['metadata']['pubmed_id'] = int(r_['PMIDs'][0]) outs.append(copy.deepcopy(parsed_result)) t1 = time() return {'ret': outs, 'time': int(1000 * (t1 - t0))}
import best bestQuery = best.BESTQuery({ "keywordA": ["cancer"], "keywordB": ["BRCA1", "EGFR"], "filterObjectName": "", "topN": 30 }) relevantEntities = best.getRelevantBioEntities(bestQuery) print(relevantEntities)