Esempio n. 1
0
    def _getActualExpansions(self, articleID, article):
        extractor = AcronymExtractor_v1()
        acronymExpansions = extractor.get_acronyms(article)

        expander = Expander_fromText()
        actualExpansions, ignore_boolean = expander.try_to_expand_acronyms(
            article, acronymExpansions)

        result = [item for item in actualExpansions.items() if len(item)>0]

        return dict(result)
Esempio n. 2
0
    def _removeInTextExpansions(self, article, acronyms):
        expander = Expander_fromText()

        for acronym in acronyms:
            for pattern in expander.definition_patterns(acronym):
                results = pattern.findall(article)
                for result in results:
                    article = article.replace(result, " ")

        self.__verifyRemovalOfInTextExpansions(article, acronyms)

        return article
Esempio n. 3
0
    def __verifyRemovalOfInTextExpansions(self, article_for_testing, acronyms):
        expander = Expander_fromText()

        for acronym in acronyms:
            for pattern in expander.definition_patterns(acronym):
                if(len(pattern.findall(article_for_testing)) > 0):
                    errorString = "Verification of text expansion removal failed:\nAcronym: " +\
                        acronym + "\nText:\n" + article_for_testing
                    common_logger.error(errorString)
                    raise RuntimeError(errorString)

            if acronym not in article_for_testing:
                errorMessage = "Acronym removed from text while cleaning:\nAcronym: " +\
                    acronym + "\nText:\n" + article_for_testing
                common_logger.error(errorMessage)
                raise RuntimeError(errorMessage)