Exemple #1
0
 def clean_tagged_text(self, tagged_text):
     """
     Remove punctuation from tagged text.
     """
     punct_tagged = lambda word: all(
         unicat(char).startswith("P") and char != "," for char in word)
     cleaned = filter(lambda t: not punct_tagged(t[0]), tagged_text)
     return list(cleaned)
Exemple #2
0
def normalize(sent):

    is_punct = lambda word: all(unicat(c).startswith('P') for c in word)

    # Removes punctuation from a tokenized/tagged sentence and lowercases.
    sent = filter(lambda t: not is_punct(t[0]), sent)
    sent = map(lambda t: (t[0].lower(), t[1]), sent)
    return list(sent)
Exemple #3
0
 def normalize(self, sent):
     """
     Removes punctuation from a tokenized/tagged sentence and
     lowercases words.
     """
     is_punct = lambda word: all(unicat(char).startswith('P') for char in word)
     sent = filter(lambda t: not is_punct(t[0]), sent)
     sent = map(lambda t: (t[0].lower(), t[1]), sent)
     return list(sent)
Exemple #4
0
def normalize(sent):
    """
    Removes punctuation from a tokenized/tagged sentence and
    lowercases words.
    """
    sent = tweet_tokenizer.tokenize(sent)
    sent = [x for x in sent if not 'http' in x]
    is_punct = lambda word: all(unicat(char).startswith('P') for char in word)
    sent = filter(lambda t: not is_punct(t[0]), sent)
    #     sent = map(lambda t: (t[0].lower(), t[1]), sent)
    sent = map(lambda t: t.lower(), sent)
    return list(sent)
 def normalize(self, sent):
     """
     Removes punctuation from a tokenized/tagged sentence and
     lowercases words.
     """
     is_punct = lambda word: all(unicat(char).startswith('P') for char in word)
     sent = filter(lambda t: not is_punct(t[0]), sent)
     sent = list(sent)
     if len(sent) == 2:
         sent = map(lambda t: (t[0].lower(), t[1]), [sent])
         sent = list(sent)
     else:
         sent = list()
     return sent