def corr_full(df): ast.corr(df, corr_method='pearson', annot=True, palette='colorblind', style='fivethirtyeight', dpi=240, title='This is a title', sub_title='And this a subtitle', x_label='this is x label', y_label='and this y', legend=False, x_scale='log', y_scale='symlog', x_limit=1, y_limit=[1,199])
def test_simple_minimal(df): ast.corr(df) ast.kde(data=df, x='A') ast.hist(df, x='A') ast.pie(df, x='other') ast.swarm(df, x='A', y='B', hue='even') ast.scat(df, x='A', y='B', hue='even') ast.line(df, x='A') ast.grid(df, x='A', y='B', col='even') ast.box(df, x='odd', y='A', hue='even') ast.violin(df, x='odd', y='A', hue='even') ast.strip(df, x='odd', y='B', hue='even') ast.count(df, x='cats') ast.bargrid(df, x='even', y='B', hue='other', col='odd') ast.overlap(df, x='A', y='B', label_col='other') ast.multikde(df, x='A', label_col='even') ast.compare(df, x='A', y=['B', 'C'], label_col='other') ast.multicount(df, x='even', hue='odd', col='other')
def plot_corr(self, metric='val_acc', color_grades=5): '''A heatmap with a single metric and hyperparameters. NOTE: remember to invoke %matplotlib inline if in notebook metric :: the metric to correlate against color_grades :: number of colors to use in heatmap''' cols = self._cols(metric) return corr(self.data[cols], color_grades=color_grades)
def check_correlation(newData): ast.corr(newData, corr_method='pearson', title="Correlation Heatmap of All Features") plt.show() contingency_table = np.zeros((3, 3), dtype=int) contingency_table[0, 0] = sum( (newData['Vaccinated'] == 1) == (newData['Dewormed'] == 1)) contingency_table[0, 1] = sum( (newData['Vaccinated'] == 1) == (newData['Dewormed'] == 0)) contingency_table[0, 2] = sum( (newData['Vaccinated'] == 1) == (newData['Dewormed'] == 0.5)) contingency_table[1, 0] = sum( (newData['Vaccinated'] == 0) == (newData['Dewormed'] == 1)) contingency_table[1, 1] = sum( (newData['Vaccinated'] == 0) == (newData['Dewormed'] == 0)) contingency_table[1, 2] = sum( (newData['Vaccinated'] == 0) == (newData['Dewormed'] == 0.5)) contingency_table[2, 0] = sum( (newData['Vaccinated'] == 0.5) == (newData['Dewormed'] == 1)) contingency_table[2, 1] = sum( (newData['Vaccinated'] == 0.5) == (newData['Dewormed'] == 0)) contingency_table[2, 2] = sum( (newData['Vaccinated'] == 0.5) == (newData['Dewormed'] == 0.5)) stat, p, dof, expected = chi2_contingency(contingency_table) prob = 0.95 # interpret p-value alpha = 1.0 - prob if p <= alpha: print('Dependent (reject H0)') else: print('Independent (fail to reject H0)')
def plot_corr(self, metric, exclude, color_grades=5): '''A heatmap with a single metric and hyperparameters. NOTE: remember to invoke %matplotlib inline if in notebook metric | str | Column label for the metric to correlate with exclude | list | Column label/s to be excluded from the correlation color_grades | int | Number of colors to use in heatmap ''' try: import astetik as ast cols = self._cols(metric, exclude) return ast.corr(self.data[cols], color_grades=color_grades) except RuntimeError: print('Matplotlib Runtime Error. Plots will not work.')
import pandas as pd import matplotlib.pyplot as plt from scipy.stats import chi2_contingency newData = np.load("data_without_outliers.npy", allow_pickle=True) newData = pd.DataFrame(newData, dtype=float) newData.columns = [ 'Type', 'Age', 'Breed1', 'Breed2', 'Gender', 'Color1', 'Color2', 'Color3', 'Maturity Size', 'Fur Length', 'Vaccinated', 'Dewormed', 'Sterilized', 'Health', 'Quantity', 'Fee', 'State', 'Video Amount', 'Photo Amount', 'Sentmt Magnitude', 'Sentmt Score', 'Adoption Speed' ] ast.corr(newData, corr_method='pearson', title="Correlation Heatmap of All Features") plt.show() contingency_table = np.zeros((3, 3), dtype=int) contingency_table[0, 0] = sum( (newData['Vaccinated'] == 1) == (newData['Dewormed'] == 1)) contingency_table[0, 1] = sum( (newData['Vaccinated'] == 1) == (newData['Dewormed'] == 0)) contingency_table[0, 2] = sum( (newData['Vaccinated'] == 1) == (newData['Dewormed'] == 0.5)) contingency_table[1, 0] = sum( (newData['Vaccinated'] == 0) == (newData['Dewormed'] == 1)) contingency_table[1, 1] = sum( (newData['Vaccinated'] == 0) == (newData['Dewormed'] == 0))