def __init__(self, term, role, synonyms, tree_position): Term.__init__(self, term, role, synonyms) # The tree position must always be a set if type(tree_position) is not set: raise TypeError("The tree position must be a set, but I received" "a %r instead (which is a %r)", tree_position, type(tree_position)) self.__tree_position=tree_position
def __init__(self, term, role, synonyms, tree_position): Term.__init__(self, term, role, synonyms) # The tree position must always be a set if type(tree_position) is not set: raise TypeError( "The tree position must be a set, but I received" "a %r instead (which is a %r)", tree_position, type(tree_position)) self.__tree_position = tree_position
def check_for_subheadings(self, an_expression): "Checks to see if this expression needs a subheading added." positions = [ self._tree[a_term.term].position for a_term in an_expression.utterance ] for position in positions: for rule in self.data.subheading_rules: """Check each position in each tree for membership in the tree-based checktag rules""" if 'in' in rule: in_rule = any( [any([x in y for x in rule['in']]) for y in position]) else: in_rule = False if 'not in' in rule: not_in_rule = any([ any([x in y for x in rule['not in']]) for y in position ]) else: not_in_rule = False if in_rule and not not_in_rule: logging.log(ULTRADEBUG, "Expression %r matches subheading rule %r", an_expression, rule) return [Term(x) for x in rule['terms']] return []
def check_extra_checktag_rules(self, an_expression): """Checks to see if a mesh term is in a known checktag-emitting tree""" positions = [ self._tree[a_term.term].position for a_term in an_expression.utterance ] for position in positions: for rule in self.data.extra_checktag_rules: """Check each position in each tree for membership in the tree-based checktag rules""" if 'in' in rule: in_rule = any( [any([x in y for x in rule['in']]) for y in position]) else: in_rule = False if 'not in' in rule: not_in_rule = any([ any([x in y for x in rule['not in']]) for y in position ]) else: not_in_rule = False if in_rule and not not_in_rule: logging.log(ULTRADEBUG, "Expression %r matches checktag rule %r", an_expression, rule) self._extra_checktags |= set( [Term(x) for x in rule['terms']]) return
def from_medline(self, medline_string): """Reformats a MEDLINE record headings field into an Expression by splitting across / lowercasing and removing stars. Also assigns the role of '*' to starred terms.""" self._utterance = [ Term(x, '*' if '*' in x else "") for x in medline_string.strip().replace('*', "").split('/') ] self._original_utterance = medline_string return self # For method chaining
def major_heading(self): """Returns a Major Heading if there is one; otherwise, returns nothing""" if isinstance(self._original_utterance, list): potential_terms = self._original_utterance else: potential_terms = [ x for x in self._original_utterance.strip().split('/') ] potential_terms = [ Term(x.replace('*', ''), '*' if '*' in x else "") for x in potential_terms if '*' in x ] return potential_terms
def check_checktag_rules(self, CUI): """Compares a CUI to the checktag rules, and emits checktags if it matches. If a CUI is a member of an MTI list, and the list is a known match to a checktag, we emit the checktag at the end of the process.""" # We check every list for membership (except exclusions) for (listname, checktags) in self.data.checktag_rules.iteritems(): if listname == '_exclusions': continue if CUI in self.data.lists[listname]: logging.log(ULTRADEBUG, 'CUI %r matches list %r. Checktags %r added.', CUI, listname, checktags) self._extra_checktags |= set([Term(x) for x in checktags])
def convert_step_2(self, umls_concept): """Converts a UMLS concept into a MeSH expression.""" if umls_concept.mapping_method not in self._accepted_types: # Unless there's only a single concept as a target if len(umls_concept.names_and_ids) == 1: return Expression( [Term(x) for x in umls_concept.names_and_ids.itervalues()]) else: #logging.debug("Concept %r has an unsupported mapping type", # umls_concept) return Expression([]) #pass # Concepts mapped with mapping method "A" are associated expressions, # i.e. compound mappings if umls_concept.mapping_method == 'a': return Expression( [Term(x) for x in umls_concept.names_and_ids.itervalues()]) # Now only the synonyms remain. Is there just one item? Return it. if len(umls_concept.names_and_ids) == 1: return Expression( [Term(x) for x in umls_concept.names_and_ids.itervalues()]) # Is there just one descriptor? Return it. descriptors = [ umls_concept.names_and_ids[x] for x in umls_concept.names_and_ids.iter_descriptors() ] if len(descriptors) == 1: return Expression([Term(x) for x in descriptors]) # Is there one descriptor identical to the name? Return the concept # name. if len([x for x in descriptors if x == umls_concept.concept_name]) == 1: return Expression([Term(umls_concept.concept_name)]) # Do all qualifiers and descriptors share the same text? qualifiers = [ umls_concept.names_and_ids[x] for x in umls_concept.names_and_ids.iter_qualifiers() ] unique_names = set(descriptors) | set(qualifiers) if len(unique_names) == 1: # Return the only name return Expression(Term(unique_names.pop())) # I give up; return the deepest item according to the tree. tree_depths = [(self._tree[x].deepest_depth(), x) for x in umls_concept.names_and_ids.itervalues()] tree_depths.sort(reverse=True) deepest = self._tree.deepest_of_list( x for x in umls_concept.names_and_ids.itervalues()) return Expression([Term(deepest)]) # Return the term
def __setstate__(self, state): Term.__setstate__(self, state) self.__tree_position=state['pos']
def __getstate__(self): tstate=Term.__getstate__(self) tstate['pos']=self.__tree_position return tstate
def testBasicConversion(self): output = self.ruleless_converter.convert(self.buildRankedResultSet()) self.assert_(type(output) is RankedConversionResult) output = [x for x in output] self.assertEqual(Term('hareitosis'), output[0][0].utterance[0])
def __getstate__(self): tstate = Term.__getstate__(self) tstate['pos'] = self.__tree_position return tstate
def __setstate__(self, state): Term.__setstate__(self, state) self.__tree_position = state['pos']
def convert_step_2(self, umls_concept): return Expression([Term(x) for x in umls_concept.names_and_ids.itervalues()])