def __verifyRemovalOfInTextExpansions(self, article_for_testing, acronyms): expander = Expander_fromText() for acronym in acronyms: for pattern in expander.definition_patterns(acronym): if(len(pattern.findall(article_for_testing)) > 0): errorString = "Verification of text expansion removal failed:\nAcronym: " +\ acronym + "\nText:\n" + article_for_testing common_logger.error(errorString) raise RuntimeError(errorString) if acronym not in article_for_testing: errorMessage = "Acronym removed from text while cleaning:\nAcronym: " +\ acronym + "\nText:\n" + article_for_testing common_logger.error(errorMessage) raise RuntimeError(errorMessage)
def _createArticleAndAcronymDB(): acronymExpander = Expander_fromText_v2() articleDB = {} acronymDB = {} CUID_to_expansion = {} for fileName in os.listdir(folder_msh_arff): filePath = os.path.join(folder_msh_arff, fileName) file_reader = arff.Reader(open(filePath, "rb")) # the iterator needs to be called for the self.relation part to be # initialized lines = list(file_reader) cuids = file_reader.relation.strip().split("_") # storing all acronyms as uppercase values acronym = _fileNameToAcronym(fileName).upper() cuid_and_pmid = [] for line in lines: pmid = unicode(line.PMID) text = TextTools.toUnicode(line.citation) cuid = cuids[_classToIndex(line["class"])] textWithoutMarkup = _removeMarkup(text) if (cuid not in CUID_to_expansion): acronymExpansions = [] acronymExpansions = acronymExpander.expand( acronym, acronymExpansions, textWithoutMarkup) if (len(acronymExpansions) != 0 and acronymExpansions[0].expansion != acronym): CUID_to_expansion[cuid] = acronymExpansions[0].expansion if (pmid not in articleDB): articleDB[pmid] = textWithoutMarkup cuid_and_pmid.append([cuid, pmid]) if (acronym in acronymDB): common_logger.error("acronym already present in acronymDB") else: acronymDB[acronym] = [] for cuid, pmid in cuid_and_pmid: if (cuid in CUID_to_expansion): acronymDB[acronym].append([CUID_to_expansion[cuid], pmid, 0]) else: common_logger.error( "Expansion not found for CUID %s of %s" % (cuid, acronym)) acronymDB[acronym].append([cuid, pmid, 0]) return acronymDB, articleDB
def internal_server_error(e): common_logger.error(e) return render_template(string_constants.file_errorpage), 500