Ejemplo n.º 1
0
import matplotlib.pyplot as plt
import seaborn as sns

from greyatomlib import pandas as pd

data = pd.read_csv('data/house_prices_multivariate.csv')


def plot(num_cols):
    for i in range(0, len(num_cols), 2):
        if len(num_cols) > i + 1:
            plt.figure(figsize=(10, 4))
            plt.subplot(121)
            sns.distplot(data[num_cols[i]], kde=False)
            plt.subplot(122)
            sns.distplot(data[num_cols[i + 1]], kde=False)
            plt.tight_layout()
            plt.show()

        else:
            sns.distplot(data[num_cols[i]], kde=False)
Ejemplo n.º 2
0
import scipy.stats as stats

from greyatomlib import pandas as pd

df = pd.read_csv('data/house_pricing.csv')


def chi2_test(df=df):
    price = pd.qcut(df['SalePrice'], 3, labels=['High', 'Medium', 'Low'])
    freqtab = pd.crosstab(df['LandSlope'], price)
    chi2, pval, dof, expected = stats.chi2_contingency(freqtab)
    return pval, pval < 0.05
Ejemplo n.º 3
0
# Default Import
from greyatomlib import pandas as pd

dataframe_1 = pd.read_csv('data/house_prices_multivariate.csv')
house_price = dataframe_1.loc[:, 'SalePrice']

dataframe_2 = pd.read_csv('data/house_prices_copy.csv').loc[:, 'SalePrice']
weight_of_nasa_space_shuttle = dataframe_2.loc[:, 'SalePrice']

def spearman_correlation():
    return dataframe_1.SalePrice.corr(dataframe_2.SalePrice, method='spearman')
print(spearman_correlation())
Ejemplo n.º 4
0
def read_csv_data_to_df(path):
    return pd.read_csv(path)
Ejemplo n.º 5
0
def load_data(path):
    dataframe = pd.read_csv(path,index_col=0)
    return  dataframe