def vector_of_language(source_file): opened_file = open(source_file, encoding="utf-8") text = opened_file.read() unigram_probability = ngrams.probability(ngrams.count_ngrams(text,1)) bigram_probability = ngrams.probability_of_bigram(ngrams.count_ngrams(text, 2)) trigram_probability = ngrams.probability_of_trigram(ngrams.count_ngrams(text, 3)) return [unigram_probability, bigram_probability, trigram_probability]
def vector_of_language(source_file): opened_file = open(source_file, encoding="utf-8") unigrams = [{},{}] bigrams = [{},{}] trigrams = [{},{}] for line in opened_file: unigrams[1] = ngrams.count_ngrams(line,1) unigrams[0] = sum((collections.Counter(dict(lines)) for lines in unigrams), collections.Counter()) bigrams[1] = ngrams.count_ngrams(line,2) bigrams[0] = sum((collections.Counter(dict(lines)) for lines in bigrams), collections.Counter()) trigrams[1] = ngrams.count_ngrams(line,3) trigrams[0] = sum((collections.Counter(dict(lines)) for lines in trigrams), collections.Counter()) unigram_probability = ngrams.probability(unigrams[0]) bigram_probability = ngrams.probability_of_bigram(bigrams[0]) trigram_probability = ngrams.probability_of_trigram(trigrams[0]) return [unigram_probability, bigram_probability, trigram_probability]