def get_sum_vector(corpus, model): vector = [] for c in corpus: m = model[c] sum = 0 for n in m: sum += n[1] vector.append(np.array([sum])) return np.array(vector) document = Document('./wikitext-2-raw-v1/wikitext-2-raw/wiki.train.raw') document.pre_process() document.build_n_grams(2) vocab = Dictionary(document.n_grams) corpus = [vocab.doc2bow(line) for line in document.n_grams] # convert corpus to BoW format model = TfidfModel(corpus) vector = get_vector(corpus=corpus, model=model) print(vector) km = KMeansRecommend(data=vector) km.k_means() title = "Life"