def parseSurfaceSemantics(sss_str): if '_' not in sss_str: return [] text,POS,senses = splitSurfaceSemantics(sss_str) try: return [pywordnet.getWord(text,POS).getSenses()[int(s)-1] for s in senses] except (IndexError,KeyError): sense = None for altPOS in ('N','V','ADJ','ADV'): if altPOS == POS: continue try: return [pywordnet.getWord(text,POS).getSenses()[int(s)-1] for s in senses] except (IndexError,KeyError): pass return []
def parseSurfaceSemantics(sss_str): if "_" not in sss_str: return [] text, POS, senses = splitSurfaceSemantics(sss_str) try: return [pywordnet.getWord(text, POS).getSenses()[int(s) - 1] for s in senses] except (IndexError, KeyError): sense = None for altPOS in ("N", "V", "ADJ", "ADV"): if altPOS == POS: continue try: return [pywordnet.getWord(text, POS).getSenses()[int(s) - 1] for s in senses] except (IndexError, KeyError): pass return []
def extractSurfaceSemantics(token,parent): global Senses POS=getPartOfSpeech(token,parent) tokenSenses = {} text = token['TEXT'].lower() default = token['TEXT'].upper() if POS in ['N', 'V', 'ADV', 'ADJ']: try: #Redo as test = foo while not tokenSensesword: try: foo ; except KeyError: foo = next foo tokenSenses = Senses[text] except KeyError: logger.warning('extractSurfaceSemantics : Text not in tagged senses: %s', text) try: #logger.warning('extractSurfaceSemantics : Previously unseen word but in WordNet?: %s', text) # stringified range of possible senses without spaces tokenSenses = {POS : range(1,len(pywordnet.getWord(text,POS).getSenses())+1)} except KeyError: try: logger.warning('extractSurfaceSemantics : Inflected version of WordNet word? %s', text) if text.endswith('s'): text = text[:-1] tokenSenses = Senses[text] else: stemmer = PorterStemmer() # Update WordNetStemmer to NLTK 1.4 API stemmer.stem(token) text = token['STEM'] tokenSenses = Senses[text] except KeyError: text = token['TEXT'].lower() try: logger.warning('extractSurfaceSemantics : Misspelling / typo of WordNet word? %s', text) spellchecker = enchant.DictWithPWL('en_US', Lexicon) s = '' for s in spellchecker.suggest(text): if s in Senses: tokenSenses = Senses[s] break if not tokenSenses and spellchecker.suggest(text): s = spellchecker.suggest(text)[0] tokenSenses = {POS : range(1,len(pywordnet.getWord(s,POS).getSenses())+1)} if s and Options.Spellcheck: logger.warning('extractSurfaceSemantics : Found spelling correction %s for %s', s,text) text = s #logger.debug('*** extractSurfaceSemantics : Implement spelling correction. *** ') #raise KeyError except KeyError: logger.error('extractSurfaceSemantics : Unknown token: %s', text) return default # Handle experienced typos. if 'see' in tokenSenses: ### FIXME adding to dict for typos that are other words text = tokenSenses['see'] try: tokenSenses = Senses[text] except: return default # Handle morphology variants that wordnet understands. elif isinstance(tokenSenses, tuple): text,tokenSenses[POS] = tokenSenses[POS] try: return '_'.join([text,POS,','.join([str(i) for i in tokenSenses[POS]])]) except KeyError: #logger.warning('extractSurfaceSemantics : Expected POS %s for token %s, Got %s, Using %s', # POS, token, tokenSenses.keys(), tokenSenses.keys()[0]) if tokenSenses.keys(): POS = token['POS'] = tokenSenses.keys()[0] return '_'.join([text,POS,','.join([str(i) for i in tokenSenses.values()[0]])]) except Exception,e: logger.error('extractSurfaceSemantics: %s: Could not find sense %s for token %s', e, POS, token) #tokenSenses, text
def extractSurfaceSemantics(token, parent): global Senses POS = getPartOfSpeech(token, parent) tokenSenses = {} text = token["TEXT"].lower() default = token["TEXT"].upper() if POS in ["N", "V", "ADV", "ADJ"]: try: # Redo as test = foo while not tokenSensesword: try: foo ; except KeyError: foo = next foo tokenSenses = Senses[text] except KeyError: logger.warning("extractSurfaceSemantics : Text not in tagged senses: %s", text) try: # logger.warning('extractSurfaceSemantics : Previously unseen word but in WordNet?: %s', text) # stringified range of possible senses without spaces tokenSenses = {POS: range(1, len(pywordnet.getWord(text, POS).getSenses()) + 1)} except KeyError: try: logger.warning("extractSurfaceSemantics : Inflected version of WordNet word? %s", text) if text.endswith("s"): text = text[:-1] tokenSenses = Senses[text] else: stemmer = PorterStemmer() # Update WordNetStemmer to NLTK 1.4 API stemmer.stem(token) text = token["STEM"] tokenSenses = Senses[text] except KeyError: text = token["TEXT"].lower() try: logger.warning("extractSurfaceSemantics : Misspelling / typo of WordNet word? %s", text) spellchecker = enchant.DictWithPWL("en_US", Lexicon) s = "" for s in spellchecker.suggest(text): if s in Senses: tokenSenses = Senses[s] break if not tokenSenses and spellchecker.suggest(text): s = spellchecker.suggest(text)[0] tokenSenses = {POS: range(1, len(pywordnet.getWord(s, POS).getSenses()) + 1)} if s and Options.Spellcheck: logger.warning("extractSurfaceSemantics : Found spelling correction %s for %s", s, text) text = s # logger.debug('*** extractSurfaceSemantics : Implement spelling correction. *** ') # raise KeyError except KeyError: logger.error("extractSurfaceSemantics : Unknown token: %s", text) return default # Handle experienced typos. if "see" in tokenSenses: ### FIXME adding to dict for typos that are other words text = tokenSenses["see"] try: tokenSenses = Senses[text] except: return default # Handle morphology variants that wordnet understands. elif isinstance(tokenSenses, tuple): text, tokenSenses[POS] = tokenSenses[POS] try: return "_".join([text, POS, ",".join([str(i) for i in tokenSenses[POS]])]) except KeyError: # logger.warning('extractSurfaceSemantics : Expected POS %s for token %s, Got %s, Using %s', # POS, token, tokenSenses.keys(), tokenSenses.keys()[0]) if tokenSenses.keys(): POS = token["POS"] = tokenSenses.keys()[0] return "_".join([text, POS, ",".join([str(i) for i in tokenSenses.values()[0]])]) except Exception, e: logger.error( "extractSurfaceSemantics: %s: Could not find sense %s for token %s", e, POS, token ) # tokenSenses, text