print('#' * 40)
    print('### TOP %i DUPLICATED ITEM:' % (k + 1), groups.index[k])
    print('#' * 40)
    top = ds.df.loc[ds.df.label_group == groups.index[k]]
    displayDF(top, random=False, ROWS=2, COLS=4)

model = TfidfVectorizer(stop_words='english', binary=True)
text_embeddings = model.fit_transform(ds.df.title).toarray()
print('text embeddings shape is', text_embeddings.shape)

from sklearn.neighbors import NearestNeighbors

KNN = 50
model = NearestNeighbors(n_neighbors=KNN)
model.fit(text_embeddings)
distances, indices = model.kneighbors(text_embeddings)

knn_model_folder = "./models/knn"
np.save(os.path.join(knn_model_folder, "distances_50_centroids.npy"),
        distances)
np.save(os.path.join(knn_model_folder, "indices_50_centroids.npy"), indices)

###############  Image training Pytorch

# configs:

torch_train_dataset = ShopeeTorch()

# Number of classes in the dataset
num_classes = torch_train_dataset.n_labels()