def get_vocab(corpus):
    vocab = defaultdict(float)
    for sent in corpus:
        for word in clean_str(sent).split():
            vocab[word] += 1
    print(len(vocab))
    return vocab
Esempio n. 2
0
def get_vocab(corpus):
    vocab = defaultdict(float)
    for sent in corpus:
        for word in clean_str(sent).split():
            vocab[word] += 1
    print(len(vocab))
    return vocab
def process(corpus):
    return [clean_str(sent) for sent in corpus]
Esempio n. 4
0
def process(corpus):
    return [clean_str(sent) for sent in corpus]