예제 #1
0
def wrangle_mall_data():
    """
    This function takes acquired mall data, completes the prep
    and splits the data into train, validate, and test datasets
    """
    df = acquire.get_mall_data()
    train, test, validate = prepare.prep_mall_data(df)
    #train_and_validate, test = train_test_split(df, test_size=.15, random_state=123)
    #train, validate = train_test_split(train_and_validate, test_size=.15, random_state=123)
    # return train, test, validate
    train_scaled, validate_scaled, test_scaled = scale_mall(train, validate, test)
    return train, validate, test, train_scaled, validate_scaled, test_scaled
def wrangle_mall_clustering():
    mall = acquire.get_mall_data()
    mall = prepare.encode_label(mall, ['gender'])
    train, validate, test = prepare.split_my_data(mall, pct=0.15)
    train_scaled, validate_scaled, test_scaled = prepare.scale(train, validate, test, ['age', 'annual_income', 'gender_Male'])
    return train_scaled, validate_scaled, test_scaled
import os
from acquire import get_mall_data
import scipy as sp
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, QuantileTransformer, PowerTransformer, RobustScaler, MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from numpy import array
from numpy import argmax

# Acquire data from mall_customers.customers in mysql database.

# In[20]:

df = get_mall_data()

df.info()

# In[21]:

pd.get_dummies(df.gender, dummy_na=False, drop_first=[True, True])

# In[22]:


def clean_mall(df):
    dummy_df = pd.get_dummies(df.gender,
                              dummy_na=False,
                              drop_first=[True, True])
    df = pd.concat([df, dummy_df], axis=1)
예제 #4
0
# # Acquire

# In[1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import env
import os

import acquire

# In[2]:

df = acquire.get_mall_data()

# # Summarize

# In[3]:

df.shape

# In[4]:

df.head()

# In[5]:

df.dtypes