import pandas as pd from sklearn.decomposition import PCA from sklearn.preprocessing import normalize momentum = pd.read_csv('features/video/df_momentum.csv', sep='§') momentum.rename(columns={'Unnamed: 0': 'Sequence'}, inplace=True) momentum['Sequence'] = momentum['Sequence'].apply(lambda x: x[:-len('_VIDEO')]) assert len(momentum) == 308 momentum.sort_values('Sequence', inplace=True) pca_momentum = PCA(n_components=5) pca_momentum = pd.DataFrame( pca_momentum.fit_transform(normalize(momentum.drop('Sequence', axis=1)))) pca_momentum = pca_momentum.add_prefix(f'Momentum_') pca_momentum = pd.concat([momentum['Sequence'], pca_momentum], axis=1) print(pca_momentum.head()) audio_stat_desc = pd.read_csv('features/audio/Statistique_desc.csv', sep='§') audio_stat_desc['Sequence'] = audio_stat_desc['Sequence'].apply( lambda x: x[:-len('_AUDIO')]) assert len(audio_stat_desc) == 308 audio_stat_desc.sort_values('Sequence', inplace=True) print(audio_stat_desc.head()) decoupage_seq_son = pd.read_csv('features/audio/Decoupage_Sequence_son.csv', sep='§') decoupage_seq_son['Sequence'] = decoupage_seq_son['Sequence'].apply( lambda x: x[:-len('_AUDIO')]) assert len(decoupage_seq_son) == 308 decoupage_seq_son.sort_values('Sequence', inplace=True) print(decoupage_seq_son.head())
from sklearn.decomposition import PCA import matplotlib as mpl mpl.use('TkAgg') # or whatever other backend that you want import matplotlib.pyplot as plt import seaborn as sns sns.set() # for plot styling import warnings warnings.filterwarnings('ignore') from plt import plot_cluster data = pd.read_csv( '/home/mickael/Documents/Challenge_Video_Audio_Text/features/text/emotion_doc.csv', sep='§') pca = PCA(n_components=2) pca = pd.DataFrame(pca.fit_transform(data.drop(['Sequence'], axis='columns'))) pca = pca.add_prefix(f'PCA_') model = KMeans(n_clusters=3, random_state=42, n_init=30) cluster = model.fit_predict(data.drop(['Sequence'], axis='columns')) plt.scatter(pca['PCA_0'], pca['PCA_1'], c=cluster, s=50, cmap='viridis') plt.show() plot_cluster( pca[['PCA_0', 'PCA_1']].values, data['Sequence'], cluster, '/home/mickael/Documents/Challenge_Video_Audio_Text/result/plot_cluster_sentiments.html' ) #f.savefig("/home/mickael/Documents/Challenge_Video_Audio_Text/result/kmeans_sentiments.png", bbox_inches='tight')