def clean_tagged_text(self, tagged_text): """ Remove punctuation from tagged text. """ punct_tagged = lambda word: all( unicat(char).startswith("P") and char != "," for char in word) cleaned = filter(lambda t: not punct_tagged(t[0]), tagged_text) return list(cleaned)
def normalize(sent): is_punct = lambda word: all(unicat(c).startswith('P') for c in word) # Removes punctuation from a tokenized/tagged sentence and lowercases. sent = filter(lambda t: not is_punct(t[0]), sent) sent = map(lambda t: (t[0].lower(), t[1]), sent) return list(sent)
def normalize(self, sent): """ Removes punctuation from a tokenized/tagged sentence and lowercases words. """ is_punct = lambda word: all(unicat(char).startswith('P') for char in word) sent = filter(lambda t: not is_punct(t[0]), sent) sent = map(lambda t: (t[0].lower(), t[1]), sent) return list(sent)
def normalize(sent): """ Removes punctuation from a tokenized/tagged sentence and lowercases words. """ sent = tweet_tokenizer.tokenize(sent) sent = [x for x in sent if not 'http' in x] is_punct = lambda word: all(unicat(char).startswith('P') for char in word) sent = filter(lambda t: not is_punct(t[0]), sent) # sent = map(lambda t: (t[0].lower(), t[1]), sent) sent = map(lambda t: t.lower(), sent) return list(sent)
def normalize(self, sent): """ Removes punctuation from a tokenized/tagged sentence and lowercases words. """ is_punct = lambda word: all(unicat(char).startswith('P') for char in word) sent = filter(lambda t: not is_punct(t[0]), sent) sent = list(sent) if len(sent) == 2: sent = map(lambda t: (t[0].lower(), t[1]), [sent]) sent = list(sent) else: sent = list() return sent