def __init__(self, code_spaces_path: str):
        """Loads codespaces for ICD-10 category articles.

        :param code_spaces_path: path to file with wikipedia article codespaces
        :type code_spaces_path: str
        """
        self.wikipedia_client = WikipediaClient("en")
        self._load_code_spaces_json(code_spaces_path)
    def test_get_languages(self):
        wikipedia_client = WikipediaClient("en")
        languages_json = wikipedia_client.get_languages("ICD-10")
        pages: dict = languages_json["query"]["pages"]
        langlinks: list = []
        for page in pages.values():
            langlinks = page["langlinks"]

        self.assertEqual(10, len(langlinks))
Example #3
0
    def test_find_icd_section_title(self):
        wikipedia_client: WikipediaClient = WikipediaClient("en")
        parsed_response_content: dict = wikipedia_client.search_title("ICD-10")
        icd_list_page_title: str = parsed_response_content["query"]["search"][0]["title"]
        icd_list_page_html: str = str(wikipedia.page(icd_list_page_title).html())
        disease_group_page_title: str = HtmlParser.find_icd_section_title(icd_list_page_html, "E10.3")

        self.assertEqual("ICD-10 Chapter IV: Endocrine, nutritional and metabolic diseases", disease_group_page_title)
Example #4
0
    def test_find_disease_name_and_link(self):
        wikipedia_client: WikipediaClient = WikipediaClient("en")
        chapter_title: str = "ICD-10 Chapter IV: Endocrine, nutritional and metabolic diseases"
        parsed_response_content: dict = wikipedia_client.search_title(chapter_title)
        icd_disease_group_page_title: str = parsed_response_content["query"]["search"][0]["title"]
        icd_disease_group_page_html: str = str(wikipedia.page(icd_disease_group_page_title).html())
        link, title = HtmlParser.find_disease_name_and_link(icd_disease_group_page_html, "E10.3")

        self.assertEqual("/wiki/Diabetic_retinopathy", link)
        self.assertEqual("Diabetic retinopathy", title)
class IcdWikipediaMapper:
    """Class used for mapping ICD-10 code with Wikipedia articles."""

    wikipedia_client: WikipediaClient
    icd_chapter_map: dict
    wikipedia_pages_cache: OrderedDict = OrderedDict()

    def __init__(self, code_spaces_path: str):
        """Loads codespaces for ICD-10 category articles.

        :param code_spaces_path: path to file with wikipedia article codespaces
        :type code_spaces_path: str
        """
        self.wikipedia_client = WikipediaClient("en")
        self._load_code_spaces_json(code_spaces_path)

    def __del__(self):
        pass

    def _load_code_spaces_json(self, code_spaces_path: str) -> None:
        file = open(code_spaces_path, "r")
        json_text = file.read()
        file.close()
        self.icd_chapter_map = json.loads(json_text)

    def _get_icd_10_article_chapter(self, icd10_code: str) -> str:
        for entry in self.icd_chapter_map["codeSpaces"]:
            code_space: str = entry["codeSpace"]
            code_space_search = re.search("([A-Z])([0-9]+)-([A-Z])([0-9]+)",
                                          code_space, re.IGNORECASE)
            icd10_code_search = re.search("([A-Z])([0-9]+)(\\.[0-9]+)*",
                                          icd10_code, re.IGNORECASE)
            code_letters: list = [
                code_space_search.group(1),
                code_space_search.group(3)
            ]
            code_numbers: list = [
                code_space_search.group(2),
                code_space_search.group(4)
            ]
            if code_letters[0] == code_letters[1] and code_letters[
                    0] == icd10_code_search.group(1):
                if int(code_numbers[0]) <= int(
                        icd10_code_search.group(2)) <= int(code_numbers[1]):
                    return entry["wikipediaChapter"]
            else:
                if code_letters[0] == icd10_code_search.group(1):
                    if int(code_numbers[0]) <= int(
                            icd10_code_search.group(2)) <= 99:
                        return entry["wikipediaChapter"]
                elif code_letters[1] == icd10_code_search.group(1):
                    if 0 <= int(icd10_code_search.group(2)) <= int(
                            code_numbers[1]):
                        return entry["wikipediaChapter"]
        return "not found"

    def _get_icd_chapter_article_title(self, icd10_code: str) -> str:
        icd_10_wikipedia_chapter = self._get_icd_10_article_chapter(icd10_code)
        if icd_10_wikipedia_chapter == "not found":
            return "not found"
        icd_10_chapter_search_str: str = "Chapter {} of ICD-10 deals with".format(
            icd_10_wikipedia_chapter)
        icd_10_search_result: dict = self.wikipedia_client.search_title(
            icd_10_chapter_search_str)
        return icd_10_search_result["query"]["search"][0]["title"]

    def _get_icd_chapter_article_page(self, title: str) -> str:
        if title in self.wikipedia_pages_cache:
            return self.wikipedia_pages_cache[title]

        result: str = str(wikipedia.page(title).html())
        self.wikipedia_pages_cache[title] = result
        if len(self.wikipedia_pages_cache) > 4:
            # Removes oldest page in cache
            self.wikipedia_pages_cache.popitem(False)
        return result

    def get_disease_wikipedia_data(self,
                                   icd10_code: str,
                                   languages: list = None) -> list:
        """Searches for article about disease with given ICD-10 code.

        :param icd10_code: ICD-10 code
        :type icd10_code: str
        :param languages: Article languages to be searched for (examples: en, es, ru, pl)
        :type languages: list
        :return: article title and link to english and polish version of article
        :rtype: list
        """
        if languages is None:
            languages = []
        icd10_code = icd10_code.upper()
        icd_code_upper: str = icd10_code.upper()

        icd_chapter_article_title: str = self._get_icd_chapter_article_title(
            icd10_code)
        if icd_chapter_article_title == 'not found':
            return []

        disease_group_article_html: str = self._get_icd_chapter_article_page(
            icd_chapter_article_title)

        url, title = HtmlParser.find_disease_name_and_link(
            disease_group_article_html, icd_code_upper)

        if url == "":
            return []

        result: list = [('en', title, "https://en.wikipedia.org" + url)]

        title_search_response = self.wikipedia_client.search_title(title)
        if len(title_search_response["query"]["search"]) > 0:
            title = title_search_response["query"]["search"][0]["title"]

            for language in languages:
                language_url, language_title = self.wikipedia_client.get_article_language_info(
                    title.replace("'s", ""), language)
                if language_url != "":
                    result.append((language, language_title, language_url))

        logging.debug("Article title for ICD-10 code '{}' is '{}'".format(
            icd10_code, title))
        logging.debug("Articles urls for ICD-10 code '{}' is '{}'".format(
            icd10_code, result))
        return result

    def get_diseases_wikipedia_data(self,
                                    icd10_code_list: list,
                                    languages: list = None) -> list:
        """Searches for articles about diseases with given ICD-10 codes.

        :param icd10_code_list: list of ICD-10 codes
        :type icd10_code_list: list
        :param languages: Article languages to be searched for (examples: en, es, ru, pl)
        :type languages: list
        :return: list of article title and link to english and other versions of article
        :rtype: list
        """
        if languages is None:
            languages = []
        result: list = []
        for icd10_code in icd10_code_list:
            data_search_result: list = self.get_disease_wikipedia_data(
                icd10_code, languages)
            if not data_search_result:
                logging.info("Code '{}' not found".format(icd10_code))
            elif data_search_result[0]:
                result.append(data_search_result)
            elif data_search_result[0] == "":
                logging.info("Code '{}' article not found".format(icd10_code))
        return result
    def test_constructor(self):
        wikipedia_client = WikipediaClient("pl")

        self.assertIsNotNone(wikipedia_client)
    def test_get_article_language_url(self):
        wikipedia_client = WikipediaClient("en")
        language_url, language_title = wikipedia_client.get_article_language_info("ICD-10", "pl")

        self.assertTrue("https://pl.wikipedia.org/" in language_url)
    def test_get_language_url_from_json(self):
        wikipedia_client = WikipediaClient("en")
        languages_json = wikipedia_client.get_languages("Grigori Rasputin")
        language_url, language_title = WikipediaClient._get_language_info_from_json(languages_json, "be")

        self.assertTrue("https://be.wikipedia.org/" in language_url)
    def test_search(self):
        wikipedia_client = WikipediaClient("pl")
        response: dict = wikipedia_client.search_title("ICD-10 I40")

        self.assertIsNotNone(response)
        self.assertIsNotNone(response["query"]["search"])