Beispiel #1
0
 def __init__(self, term, role, synonyms, tree_position):
     Term.__init__(self, term, role, synonyms)
     # The tree position must always be a set
     if type(tree_position) is not set:
         raise TypeError("The tree position must be a set, but I received"
                         "a %r instead (which is a %r)", tree_position,
                                                 type(tree_position))
     self.__tree_position=tree_position
Beispiel #2
0
 def __init__(self, term, role, synonyms, tree_position):
     Term.__init__(self, term, role, synonyms)
     # The tree position must always be a set
     if type(tree_position) is not set:
         raise TypeError(
             "The tree position must be a set, but I received"
             "a %r instead (which is a %r)", tree_position,
             type(tree_position))
     self.__tree_position = tree_position
Beispiel #3
0
 def check_for_subheadings(self, an_expression):
     "Checks to see if this expression needs a subheading added."
     positions = [
         self._tree[a_term.term].position
         for a_term in an_expression.utterance
     ]
     for position in positions:
         for rule in self.data.subheading_rules:
             """Check each position in each tree for membership in the
             tree-based checktag rules"""
             if 'in' in rule:
                 in_rule = any(
                     [any([x in y for x in rule['in']]) for y in position])
             else:
                 in_rule = False
             if 'not in' in rule:
                 not_in_rule = any([
                     any([x in y for x in rule['not in']]) for y in position
                 ])
             else:
                 not_in_rule = False
             if in_rule and not not_in_rule:
                 logging.log(ULTRADEBUG,
                             "Expression %r matches subheading rule %r",
                             an_expression, rule)
                 return [Term(x) for x in rule['terms']]
     return []
Beispiel #4
0
 def check_extra_checktag_rules(self, an_expression):
     """Checks to see if a mesh term is in a known checktag-emitting
     tree"""
     positions = [
         self._tree[a_term.term].position
         for a_term in an_expression.utterance
     ]
     for position in positions:
         for rule in self.data.extra_checktag_rules:
             """Check each position in each tree for membership in the
             tree-based checktag rules"""
             if 'in' in rule:
                 in_rule = any(
                     [any([x in y for x in rule['in']]) for y in position])
             else:
                 in_rule = False
             if 'not in' in rule:
                 not_in_rule = any([
                     any([x in y for x in rule['not in']]) for y in position
                 ])
             else:
                 not_in_rule = False
             if in_rule and not not_in_rule:
                 logging.log(ULTRADEBUG,
                             "Expression %r matches checktag rule %r",
                             an_expression, rule)
                 self._extra_checktags |= set(
                     [Term(x) for x in rule['terms']])
     return
Beispiel #5
0
 def from_medline(self, medline_string):
     """Reformats a MEDLINE record headings field into an Expression by
     splitting across / lowercasing and removing stars. Also assigns the
     role of '*' to starred terms."""
     self._utterance = [
         Term(x, '*' if '*' in x else "")
         for x in medline_string.strip().replace('*', "").split('/')
     ]
     self._original_utterance = medline_string
     return self  # For method chaining
Beispiel #6
0
 def major_heading(self):
     """Returns a Major Heading if there is one; otherwise, returns 
     nothing"""
     if isinstance(self._original_utterance, list):
         potential_terms = self._original_utterance
     else:
         potential_terms = [
             x for x in self._original_utterance.strip().split('/')
         ]
     potential_terms = [
         Term(x.replace('*', ''), '*' if '*' in x else "")
         for x in potential_terms if '*' in x
     ]
     return potential_terms
Beispiel #7
0
 def check_checktag_rules(self, CUI):
     """Compares a CUI to the checktag rules, and emits checktags if it
     matches. If a CUI is a member of an MTI list, and the list is a known
     match to a checktag, we emit the checktag at the end of the
     process."""
     # We check every list for membership (except exclusions)
     for (listname, checktags) in self.data.checktag_rules.iteritems():
         if listname == '_exclusions':
             continue
         if CUI in self.data.lists[listname]:
             logging.log(ULTRADEBUG,
                         'CUI %r matches list %r. Checktags %r added.', CUI,
                         listname, checktags)
             self._extra_checktags |= set([Term(x) for x in checktags])
Beispiel #8
0
 def convert_step_2(self, umls_concept):
     """Converts a UMLS concept into a MeSH expression."""
     if umls_concept.mapping_method not in self._accepted_types:
         # Unless there's only a single concept as a target
         if len(umls_concept.names_and_ids) == 1:
             return Expression(
                 [Term(x) for x in umls_concept.names_and_ids.itervalues()])
         else:
             #logging.debug("Concept %r has an unsupported mapping type",
             #              umls_concept)
             return Expression([])
             #pass
     # Concepts mapped with mapping method "A" are associated expressions,
     # i.e. compound mappings
     if umls_concept.mapping_method == 'a':
         return Expression(
             [Term(x) for x in umls_concept.names_and_ids.itervalues()])
     # Now only the synonyms remain. Is there just one item? Return it.
     if len(umls_concept.names_and_ids) == 1:
         return Expression(
             [Term(x) for x in umls_concept.names_and_ids.itervalues()])
     # Is there just one descriptor? Return it.
     descriptors = [
         umls_concept.names_and_ids[x]
         for x in umls_concept.names_and_ids.iter_descriptors()
     ]
     if len(descriptors) == 1:
         return Expression([Term(x) for x in descriptors])
     # Is there one descriptor identical to the name? Return the concept
     # name.
     if len([x for x in descriptors
             if x == umls_concept.concept_name]) == 1:
         return Expression([Term(umls_concept.concept_name)])
     # Do all qualifiers and descriptors share the same text?
     qualifiers = [
         umls_concept.names_and_ids[x]
         for x in umls_concept.names_and_ids.iter_qualifiers()
     ]
     unique_names = set(descriptors) | set(qualifiers)
     if len(unique_names) == 1:
         # Return the only name
         return Expression(Term(unique_names.pop()))
     # I give up; return the deepest item according to the tree.
     tree_depths = [(self._tree[x].deepest_depth(), x)
                    for x in umls_concept.names_and_ids.itervalues()]
     tree_depths.sort(reverse=True)
     deepest = self._tree.deepest_of_list(
         x for x in umls_concept.names_and_ids.itervalues())
     return Expression([Term(deepest)])  # Return the term
Beispiel #9
0
 def __setstate__(self, state):
     Term.__setstate__(self, state)
     self.__tree_position=state['pos']
Beispiel #10
0
 def __getstate__(self):
     tstate=Term.__getstate__(self)
     tstate['pos']=self.__tree_position
     return tstate
 def testBasicConversion(self):
     output = self.ruleless_converter.convert(self.buildRankedResultSet())
     self.assert_(type(output) is RankedConversionResult)
     output = [x for x in output]
     self.assertEqual(Term('hareitosis'), output[0][0].utterance[0])
Beispiel #12
0
 def __getstate__(self):
     tstate = Term.__getstate__(self)
     tstate['pos'] = self.__tree_position
     return tstate
Beispiel #13
0
 def __setstate__(self, state):
     Term.__setstate__(self, state)
     self.__tree_position = state['pos']
 def convert_step_2(self, umls_concept):
     return Expression([Term(x) for x in 
                        umls_concept.names_and_ids.itervalues()])