def __verifyRemovalOfInTextExpansions(self, article_for_testing, acronyms):
        expander = Expander_fromText()

        for acronym in acronyms:
            for pattern in expander.definition_patterns(acronym):
                if(len(pattern.findall(article_for_testing)) > 0):
                    errorString = "Verification of text expansion removal failed:\nAcronym: " +\
                        acronym + "\nText:\n" + article_for_testing
                    common_logger.error(errorString)
                    raise RuntimeError(errorString)

            if acronym not in article_for_testing:
                errorMessage = "Acronym removed from text while cleaning:\nAcronym: " +\
                    acronym + "\nText:\n" + article_for_testing
                common_logger.error(errorMessage)
                raise RuntimeError(errorMessage)
Exemple #2
0
def _createArticleAndAcronymDB():
    acronymExpander = Expander_fromText_v2()
    articleDB = {}
    acronymDB = {}
    CUID_to_expansion = {}
    for fileName in os.listdir(folder_msh_arff):
        filePath = os.path.join(folder_msh_arff, fileName)
        file_reader = arff.Reader(open(filePath, "rb"))
        # the iterator needs to be called for the self.relation part to be
        # initialized
        lines = list(file_reader)
        cuids = file_reader.relation.strip().split("_")
        # storing all acronyms as uppercase values
        acronym = _fileNameToAcronym(fileName).upper()
        cuid_and_pmid = []
        for line in lines:
            pmid = unicode(line.PMID)
            text = TextTools.toUnicode(line.citation)
            cuid = cuids[_classToIndex(line["class"])]
            textWithoutMarkup = _removeMarkup(text)
            if (cuid not in CUID_to_expansion):
                acronymExpansions = []
                acronymExpansions = acronymExpander.expand(
                    acronym, acronymExpansions, textWithoutMarkup)
                if (len(acronymExpansions) != 0 and
                        acronymExpansions[0].expansion != acronym):
                    CUID_to_expansion[cuid] = acronymExpansions[0].expansion
            if (pmid not in articleDB):
                articleDB[pmid] = textWithoutMarkup
            cuid_and_pmid.append([cuid, pmid])

        if (acronym in acronymDB):
            common_logger.error("acronym already present in acronymDB")
        else:
            acronymDB[acronym] = []
        for cuid, pmid in cuid_and_pmid:
            if (cuid in CUID_to_expansion):
                acronymDB[acronym].append([CUID_to_expansion[cuid], pmid, 0])
            else:
                common_logger.error(
                    "Expansion not found for CUID %s of %s" % (cuid, acronym))
                acronymDB[acronym].append([cuid, pmid, 0])

    return acronymDB, articleDB
Exemple #3
0
def internal_server_error(e):
    common_logger.error(e)

    return render_template(string_constants.file_errorpage), 500