def showCorrelation(gripperjack_nr, part): data = DynamicCsvConverter(gripperjack_nr, part, '5min', 'max', pd.read_csv( 'C:\\Users\\Lukassen\\PycharmProjects\\GelredomeVeldErrorVoorspellen\\Recources\\Volledige_Gelredome_Data_CSV.csv', index_col=False)) data = data.make_file() # to see correlation with the to be predicted remove 'to_be_predicted' from drop columns and put the 'to_be_predicted variable in the data.pop method' data = data.drop(columns=['Timestamp']) data = data.dropna() y = data.pop('to_be_predicted') X = data # Create a list of the feature names features = np.array(data.columns) # Create a list of the discrete features discrete = [False for _ in range(len(features))] discrete[1] = True # Instantiate the visualizer visualizer = FeatureCorrelation(labels=features, size=(1200, 700)) visualizer.title = part visualizer.fit(X, y) values.append(visualizer.scores_) visualizer.show()
from sklearn import datasets from yellowbrick.target import FeatureCorrelation # Load the regression dataset data = datasets.load_diabetes() X, y = data['data'], data['target'] # Create a list of the feature names features = np.array(data['feature_names']) # Instantiate the visualizer visualizer = FeatureCorrelation(labels=features) visualizer.fit(X, y) # Fit the data to the visualizer visualizer.show() ## PCA - Principal Component Analysis https://www.kaggle.com/ryanholbrook/principal-component-analysis from sklearn.decomposition import PCA # Create principal components pca = PCA() X_pca = pca.fit_transform(X_scaled) # Convert to dataframe component_names = [f"PC{i+1}" for i in range(X_pca.shape[1])] X_pca = pd.DataFrame(X_pca, columns=component_names) ## Target Encoding from category_encoders import MEstimateEncoder
from yellowbrick.target import FeatureCorrelation rc['xtick.labelsize'] = 15.0 rc['ytick.labelsize'] = 15.0 rc['xtick.direction'] = 'out' rc['axes.labelsize'] = 15.0 rc['axes.titlesize'] = 18.0 rc['savefig.format'] = 'png' rc['savefig.dpi'] = 600 rc['legend.fontsize'] = 15 x = df.drop('Death_Event', axis=1) y = df['Death_Event'] fig = plt.figure(figsize=(8, 6)) corr = FeatureCorrelation(method='pearson', label=x.columns, sort=True).fit(x, y) plt.savefig('../../outputs/visuals/correlations') corr.show() fig, ax = plt.subplots(figsize=(20, 10)) sns.heatmap(df.corr(), annot=True, square=False, ax=ax) ax.set_title('Correlations between features') plt.savefig('../../outputs/visuals/correlations_all') plt.show() # Age distribution of Patients fig, ax = plt.subplots(figsize=(8, 6)) sns.kdeplot(df['Age'], legend=False, shade=True, ax=ax) plt.savefig('../../outputs/visuals/age_distribution') ax.set_title('Age Distribution of Patients')
plt.show() plt.figure() # Instantiate the visualizer visualizerFC = FeatureCorrelation(labels=features, color="rebeccapurple", title=' ') visualizerFC.fit(X, y) locationFileNameFC = os.path.join('/home/ak/Documents/Research/Papers/figures',str(symbols[symbolIdx])+'_idx_'+str(idx) \ +'_label_'+str(labelName)+'_date_'+str(dateIdx)+'_label_'+str(labelsIdx)+'_FeatureCorrelation_w_depn_var.png') plt.xlabel('', fontsize=11) plt.xticks(fontsize=14) plt.yticks(fontsize=12) visualizerFC.show(outpath=locationFileNameFC) plt.show() # # # Instantiate the visualizer set_palette('yellowbrick') plt.figure() classes = np.array([0, 1.]) plt.xticks(fontsize=9) visualizerRadViz = RadViz(classes=classes, features=features, title=' ') visualizerRadViz.fit(X, y) # Fit the data to the visualizer visualizerRadViz.transform(X) # Transform the data locationFileNameRVZ = os.path.join('/home/ak/Documents/Research/Papers/figures',str(symbols[symbolIdx]) \ +'_idx_'+str(idx)+'_label_'+str(labelsIdx)+'_date_'+str(dateIdx)+'_radviz.png')