def createWordLookup(self, foreignSentence): corpus = Corpus() tokenDictList = [] """Captures only words, no spaces/punctuation""" spanishTokens = re.compile('(\W+)', re.UNICODE).split(unicode(foreignSentence, 'utf-8')) spanishTokens.pop() for idx, token in enumerate(spanishTokens): tokenDict = dict() tokenDict['originalToken'] = token tokenDict['spanish_POS'] = corpus.spanishTags().get(token, None) if (len(token) > 0): if token[0].isupper(): tokenDict['upper'] = True else: tokenDict['upper'] = False else: tokenDict['upper'] = False tokenDictList.append(tokenDict) self.tokenDictList = tokenDictList