コード例 #1
0
    def _get_single_entity_mentions(self, entity: str, pageSize: int = None):
        """
        Generator that yields each article and article id that mentions the given entity
        See https://europepmc.org/AnnotationsApi!/annotations45api45controller/getAnnotationsArticlesByEntityUsingGET

        The articles come up sorted by number of mentions
        """
        if pageSize is None:
            pageSize = PMC_Europe_Service.MAX_PAGE_SIZE

        prevCursorMark = -1
        cursorMark = 0
        counter = 0
        while cursorMark != prevCursorMark:
            url = PMC_Europe_Service.MENTION_URL.format(
                entity, 1, 'ID_LIST', cursorMark, pageSize)
            logger.info(
                f'{datetime.datetime.now()} Getting {counter} to {counter+pageSize}'
            )
            results = requests.get(url)
            assert results.ok
            logger.info(f'{datetime.datetime.now()} Ok')
            data = json.loads(results.content.decode().strip())
            prevCursorMark = cursorMark
            cursorMark = data['nextCursorMark']
            for article in data['articles']:
                counter += 1
                yield article, article['extId']
コード例 #2
0
 def get_co_occurrences(self,
                        entity: str,
                        limit: int = 20,
                        types: List[str] = None) -> List[CoOccurrence]:
     logger.info('get co occurrences')
     """Returns a list of co-occurrences from a given entity
     """
     raise TextMiningServiceOperationNotSupported
コード例 #3
0
    def _get_mentions_for_multiple_entities(self,
                                            entities: List[str],
                                            limit: int = 20
                                            ) -> List[Publication]:
        """
        Method for multiple entities retrieval. It's slow but a bit faster than _get_mentions_for_single_entity if there is a limit.
        """
        entities = list(map(str.lower, entities))
        first_entity = entities[0]
        rest_of_entities = entities[1:]

        if len(rest_of_entities) == 0:
            pageSize = min(PMC_Europe_Service.MAX_PAGE_SIZE, limit)
        else:
            pageSize = PMC_Europe_Service.MAX_PAGE_SIZE

        prevCursorMark = -1
        cursorMark = 0
        total_counter = 0
        yielded_counter = 0
        publications = []
        scores = []
        while cursorMark != prevCursorMark and len(publications) < limit:
            url = PMC_Europe_Service.MENTION_URL.format(
                first_entity, 0, 'JSON', cursorMark, pageSize)
            logger.info(
                f'{datetime.datetime.now()} Getting {total_counter} to {total_counter+pageSize}'
            )
            results = requests.get(url)
            assert results.ok
            logger.info(f'{datetime.datetime.now()} Ok')
            data = json.loads(results.content.decode().strip())
            prevCursorMark = cursorMark
            cursorMark = data['nextCursorMark']
            for article in data['articles']:
                bool_table = dict(
                    zip(rest_of_entities, [0] * len(rest_of_entities)))
                total_counter += 1
                for annotation in article['annotations']:
                    other_entity = annotation['exact'].lower()
                    # check if this entity is what we look for
                    if other_entity in bool_table:
                        bool_table[other_entity] += 1
                # if the article includes all entities, then
                if all(bool_table.values()):
                    yielded_counter += 1
                    pub = Publication(pm_id=article_id,
                                      pmc_id=article['pmcid'])
                    publications.append(pub)
                    scores.append(sum(bool_table.values()))

        publications = np.array(publications)
        scores = np.array(scores)
        inds = scores.argsort()[::-1]
        return publications[inds]
コード例 #4
0
 def get_mentions(self,
                  entities: List[str],
                  limit: int = 20) -> List[Publication]:
     logger.info('get mentions')
     entities = [self._convert_entity(entity) for entity in entities]
     if len(entities) == 1:
         pageSize = min(limit + 1, PMC_Europe_Service.MAX_PAGE_SIZE)
         generator = self._get_single_entity_mentions(entities[0],
                                                      pageSize=pageSize)
         publications = []
         for article, _ in generator:
             if len(publications) == limit:
                 break
             else:
                 publications.append(
                     Publication(pm_id=article.get('extId', None),
                                 pmc_id=article.get('pmcid', None)))
         return publications
     else:
         raise TextMiningServiceOperationNotSupported
コード例 #5
0
 def _convert_entity(self, entity: str) -> str:
     EBI_URL = 'https://www.ebi.ac.uk/ols/api/ontologies/{}/terms?short_form={}_{}'
     try:
         ontology, number = entity.split(':', 1)
     except ValueError:
         logger.info(f'(_convert_entity) Error spliting entity {entity}')
         return entity
     url = EBI_URL.format(ontology, ontology, number)
     response = requests.get(url)
     try:
         assert response.ok
     except AssertionError:
         return entity
     data = json.loads(response.content.decode().strip())
     for term in data['_embedded']['terms']:
         if term['obo_id'] == entity:
             label = term['label']
             logger.info(
                 f'(_convert_entity) Found label {label} for entity {entity}'
             )
             return label
     return entity
コード例 #6
0
        scores = np.array(scores)
        inds = scores.argsort()[::-1]
        return publications[inds][:limit]

    def get_co_occurrences(self,
                           entity: str,
                           limit: int = 20,
                           types: List[str] = None) -> List[CoOccurrence]:
        logger.info('get co occurrences')
        """Returns a list of co-occurrences from a given entity
        """
        raise TextMiningServiceOperationNotSupported


if __name__ == "__main__":
    logger.info('PMC')
    pmc = PMC_Europe_Service()

    logger.info('Test convert entity')
    print(pmc._convert_entity('DOID:2841'))

    logger.info('get mentions for single entity PRDM1')
    for pub in pmc.get_mentions(['DOID:7148']):
        print(pub)

    print(datetime.datetime.now())
    # right now it raises TextMiningServiceOperationNotSupported
    # print('get mentions for multiple entities PRDM1, GFP')
    # for pub in pmc.get_mentions(['PRDM1', 'GFP']):
    #     print(pub)
    # print(datetime.datetime.now())