def wrangle_mall_data(): """ This function takes acquired mall data, completes the prep and splits the data into train, validate, and test datasets """ df = acquire.get_mall_data() train, test, validate = prepare.prep_mall_data(df) #train_and_validate, test = train_test_split(df, test_size=.15, random_state=123) #train, validate = train_test_split(train_and_validate, test_size=.15, random_state=123) # return train, test, validate train_scaled, validate_scaled, test_scaled = scale_mall(train, validate, test) return train, validate, test, train_scaled, validate_scaled, test_scaled
def wrangle_mall_clustering(): mall = acquire.get_mall_data() mall = prepare.encode_label(mall, ['gender']) train, validate, test = prepare.split_my_data(mall, pct=0.15) train_scaled, validate_scaled, test_scaled = prepare.scale(train, validate, test, ['age', 'annual_income', 'gender_Male']) return train_scaled, validate_scaled, test_scaled
import os from acquire import get_mall_data import scipy as sp from sklearn.model_selection import train_test_split from sklearn.impute import SimpleImputer from sklearn.preprocessing import StandardScaler, QuantileTransformer, PowerTransformer, RobustScaler, MinMaxScaler from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import OneHotEncoder from numpy import array from numpy import argmax # Acquire data from mall_customers.customers in mysql database. # In[20]: df = get_mall_data() df.info() # In[21]: pd.get_dummies(df.gender, dummy_na=False, drop_first=[True, True]) # In[22]: def clean_mall(df): dummy_df = pd.get_dummies(df.gender, dummy_na=False, drop_first=[True, True]) df = pd.concat([df, dummy_df], axis=1)
# # Acquire # In[1]: import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import env import os import acquire # In[2]: df = acquire.get_mall_data() # # Summarize # In[3]: df.shape # In[4]: df.head() # In[5]: df.dtypes