Example #1
0
import best
q = best.BESTQuery("breast cancer", "gene", noAbsTxt=False)
r = best.getRelevantBioEntities(q)
print(r[0])

Example #2
0
    def best_search(self, query, kcw_path=None):
        t0 = time()
        # Type filter
        ent_types = [
            "gene", "drug", "chemical compound", "target", "disease",
            "toxin", "transcription factor", "mirna", "pathway", "mutation"
        ]
        query_type = "All Entity Type"
        for ent_type in ent_types:
            if ent_type in query:
                query_type = ent_type
                break

        # Stopwords and filtering for BEST queries
        if not os.path.exists(os.path.join(os.path.expanduser('~'), 'nltk_data')):
            nltk.download('punkt')
            nltk.download('stopwords')
        stop_words = set(stopwords.words('english') + ['?'] + ['Why', 'What', 'How', 'Where', 'When', 'Who'])
        entity_set = [
            'COVID-19', 'SARS-CoV-2', 'hypertension', 'diabetes', 'heart', 'disease', 'obese', 'death',
            'HCoV-19', 'HCoV', 'coronavirus', 'symptoms', 'incubation', 'periods', 'period', 'quarantine',
            'asymptomatic', 'transmissions', 'fecal', 'excretion', 'decline', 'Wuhan', 'mortality',
            'patients', 'stay', 'reproduction', 'risk', 'factor', 'factors', 'pregnancy', 'interval', 'absent',
            'reported', 'length', 'diagnosed', 'United', 'States', 'isolated', 'CDC', 'WHO', 'vaccine',
            'negative', 'animals', 'airbone', 'spread', 'blood', 'sanitizer', 'controlled', 'illness', 'friends',
        ]
        query_tokens = word_tokenize(query)
        new_query = ''
        for idx, query_token in enumerate(query_tokens):
            if query_token not in stop_words:
                if query_token in entity_set:
                    new_query += query_token + ' '

        # Get BEST result
        q = best.BESTQuery(new_query, noAbsTxt=False, filterObjectName=query_type)
        r = best.getRelevantBioEntities(q)

        # No result
        if len(r) == 1 and r[0]['rank'] == 0 and len(r[0].keys()) == 1:
            t1 = time()
            return {'ret': [], 'time': int(1000 * (t1 - t0))}

        parsed_result = {
            'context': '',
            'title': '',
            'doc_idx': None,
            'start_idx': 0,
            'end_idx': 0,
            'score': 0,
            'metadata': {
                'pubmed_id': ''
            },
            'answer': ''
        }
        outs = []
        for r_idx, r_ in enumerate(r):
            parsed_result['context'] = r_['abstracts'][0]
            parsed_result['score'] = r_['score']
            parsed_result['answer'] = r_['entityName']
            parsed_result['metadata'] = self.get_doc_meta(r_['PMIDs'][0])
            if len(parsed_result['metadata']) == 0:
                parsed_result['metadata']['pubmed_id'] = int(r_['PMIDs'][0])
            outs.append(copy.deepcopy(parsed_result))

        t1 = time()
        return {'ret': outs, 'time': int(1000 * (t1 - t0))}
Example #3
0
File: test.py Project: hurben/CNP
import best

bestQuery = best.BESTQuery({
    "keywordA": ["cancer"],
    "keywordB": ["BRCA1", "EGFR"],
    "filterObjectName": "",
    "topN": 30
})
relevantEntities = best.getRelevantBioEntities(bestQuery)
print(relevantEntities)