Example #1
0
def showCorrelation(gripperjack_nr, part):
    data = DynamicCsvConverter(gripperjack_nr, part, '5min', 'max',
                               pd.read_csv(
                                   'C:\\Users\\Lukassen\\PycharmProjects\\GelredomeVeldErrorVoorspellen\\Recources\\Volledige_Gelredome_Data_CSV.csv',
                                   index_col=False))
    data = data.make_file()

    # to see correlation with the to be predicted remove 'to_be_predicted' from drop columns and put the 'to_be_predicted variable in the data.pop method'
    data = data.drop(columns=['Timestamp'])
    data = data.dropna()
    y = data.pop('to_be_predicted')
    X = data

    # Create a list of the feature names
    features = np.array(data.columns)

    # Create a list of the discrete features
    discrete = [False for _ in range(len(features))]
    discrete[1] = True

    # Instantiate the visualizer
    visualizer = FeatureCorrelation(labels=features, size=(1200, 700))
    visualizer.title = part
    visualizer.fit(X, y)
    values.append(visualizer.scores_)
    visualizer.show()
Example #2
0
from sklearn import datasets
from yellowbrick.target import FeatureCorrelation

# Load the regression dataset
data = datasets.load_diabetes()
X, y = data['data'], data['target']

# Create a list of the feature names
features = np.array(data['feature_names'])

# Instantiate the visualizer
visualizer = FeatureCorrelation(labels=features)

visualizer.fit(X, y)        # Fit the data to the visualizer
visualizer.show() 

## PCA - Principal Component Analysis https://www.kaggle.com/ryanholbrook/principal-component-analysis
from sklearn.decomposition import PCA

# Create principal components
pca = PCA()
X_pca = pca.fit_transform(X_scaled)

# Convert to dataframe
component_names = [f"PC{i+1}" for i in range(X_pca.shape[1])]
X_pca = pd.DataFrame(X_pca, columns=component_names)


## Target Encoding
from category_encoders import MEstimateEncoder
from yellowbrick.target import FeatureCorrelation

rc['xtick.labelsize'] = 15.0
rc['ytick.labelsize'] = 15.0
rc['xtick.direction'] = 'out'
rc['axes.labelsize'] = 15.0
rc['axes.titlesize'] = 18.0
rc['savefig.format'] = 'png'
rc['savefig.dpi'] = 600
rc['legend.fontsize'] = 15

x = df.drop('Death_Event', axis=1)
y = df['Death_Event']
fig = plt.figure(figsize=(8, 6))
corr = FeatureCorrelation(method='pearson', label=x.columns,
                          sort=True).fit(x, y)
plt.savefig('../../outputs/visuals/correlations')
corr.show()

fig, ax = plt.subplots(figsize=(20, 10))
sns.heatmap(df.corr(), annot=True, square=False, ax=ax)
ax.set_title('Correlations between features')
plt.savefig('../../outputs/visuals/correlations_all')
plt.show()

# Age distribution of Patients
fig, ax = plt.subplots(figsize=(8, 6))
sns.kdeplot(df['Age'], legend=False, shade=True, ax=ax)
plt.savefig('../../outputs/visuals/age_distribution')
ax.set_title('Age Distribution of Patients')
                plt.show()

                plt.figure()

                # Instantiate the visualizer
                visualizerFC = FeatureCorrelation(labels=features,
                                                  color="rebeccapurple",
                                                  title=' ')

                visualizerFC.fit(X, y)
                locationFileNameFC = os.path.join('/home/ak/Documents/Research/Papers/figures',str(symbols[symbolIdx])+'_idx_'+str(idx) \
                                                  +'_label_'+str(labelName)+'_date_'+str(dateIdx)+'_label_'+str(labelsIdx)+'_FeatureCorrelation_w_depn_var.png')
                plt.xlabel('', fontsize=11)
                plt.xticks(fontsize=14)
                plt.yticks(fontsize=12)
                visualizerFC.show(outpath=locationFileNameFC)
                plt.show()
                #
                # # Instantiate the visualizer

                set_palette('yellowbrick')
                plt.figure()
                classes = np.array([0, 1.])
                plt.xticks(fontsize=9)
                visualizerRadViz = RadViz(classes=classes,
                                          features=features,
                                          title=' ')
                visualizerRadViz.fit(X, y)  # Fit the data to the visualizer
                visualizerRadViz.transform(X)  # Transform the data
                locationFileNameRVZ = os.path.join('/home/ak/Documents/Research/Papers/figures',str(symbols[symbolIdx]) \
                                                   +'_idx_'+str(idx)+'_label_'+str(labelsIdx)+'_date_'+str(dateIdx)+'_radviz.png')