import matplotlib.pyplot as plt import seaborn as sns from greyatomlib import pandas as pd data = pd.read_csv('data/house_prices_multivariate.csv') def plot(num_cols): for i in range(0, len(num_cols), 2): if len(num_cols) > i + 1: plt.figure(figsize=(10, 4)) plt.subplot(121) sns.distplot(data[num_cols[i]], kde=False) plt.subplot(122) sns.distplot(data[num_cols[i + 1]], kde=False) plt.tight_layout() plt.show() else: sns.distplot(data[num_cols[i]], kde=False)
import scipy.stats as stats from greyatomlib import pandas as pd df = pd.read_csv('data/house_pricing.csv') def chi2_test(df=df): price = pd.qcut(df['SalePrice'], 3, labels=['High', 'Medium', 'Low']) freqtab = pd.crosstab(df['LandSlope'], price) chi2, pval, dof, expected = stats.chi2_contingency(freqtab) return pval, pval < 0.05
# Default Import from greyatomlib import pandas as pd dataframe_1 = pd.read_csv('data/house_prices_multivariate.csv') house_price = dataframe_1.loc[:, 'SalePrice'] dataframe_2 = pd.read_csv('data/house_prices_copy.csv').loc[:, 'SalePrice'] weight_of_nasa_space_shuttle = dataframe_2.loc[:, 'SalePrice'] def spearman_correlation(): return dataframe_1.SalePrice.corr(dataframe_2.SalePrice, method='spearman') print(spearman_correlation())
def read_csv_data_to_df(path): return pd.read_csv(path)
def load_data(path): dataframe = pd.read_csv(path,index_col=0) return dataframe