def perform_cluster(data, params): km = KMeans() km.set_params(**params) vectorizer = TfidfVectorizer() print(data[1][0]) tfidf = vectorizer.fit_transform(data[1]) labels = km.fit_predict(tfidf) result = {i: [] for i in set(labels)} for i, l in zip(range(len(labels)), labels): result[l].append(data[0][i]) return result