def mall_cluster():
    data = db.get_db_url(comm='SELECT * FROM customers',
                         database='mall_customers')
    data['gender'] = data['gender'].apply(lambda x: 0 if x == 'Female' else 1)
    print(data)
    x = data[['annual_income', 'spending_score']]
    kmeans = KMeans(n_clusters=5)
    kmeans.fit(x)
    print(KMeans(n_cluster=5).fit(data[['age']]).cluster_centers_)
    print(kmeans.cluster_centers_)
    fig = plt.figure(figsize=(12, 9))
    ax = Axes3D(fig)
    centers = pd.DataFrame(kmeans.cluster_centers_, columns=x.columns)

    ax.scatter(data.age,
               data.annual_income,
               data.spending_score,
               c=kmeans.labels_)
    ax.scatter(centers.age,
               centers.annual_income,
               centers.spending_score,
               c='pink',
               s=10000,
               alpha=.4)
    ax.set(xlabel='age', ylabel='annual_income', zlabel='spending_score')
    plt.show()
def wrangle_telco():
    cust = get_db_url(comm = """SELECT customer_id, monthly_charges, tenure, total_charges
                    FROM customers WHERE contract_type_id = 3
                    ORDER BY total_charges DESC;""", \
                    database = 'telco_churn')
    cust['total_charges'].apply(lambda x: x.strip())
    cust['total_charges'] = cust['total_charges'].apply(
        lambda x: float(x) if x[0].isdigit() else 0)
    return telco
from dbtools import get_db_url
import env
import MySQLdb


def get_db_url():
    db=MySQLdb.connect(host='157.230.209.171', user = env.user, \
    passwd = env.password, db=database)
    return psql.read_sql(comm, con=db)





cust = get_db_url(comm = """SELECT customer_id, monthly_charges, tenure, total_charges
                    FROM customers WHERE contract_type_id = 3
                    ORDER BY total_charges DESC;""", \
                    database = 'telco_churn')


def wrangle_telco():
    cust = get_db_url(comm = """SELECT customer_id, monthly_charges, tenure, total_charges
                    FROM customers WHERE contract_type_id = 3
                    ORDER BY total_charges DESC;""", \
                    database = 'telco_churn')
    cust['total_charges'].apply(lambda x: x.strip())
    cust['total_charges'] = cust['total_charges'].apply(
        lambda x: float(x) if x[0].isdigit() else 0)
    return telco
def get_iris_data(command="""SELECT measurement_id,  sepal_length,  sepal_width
,petal_length, petal_width,  species.species_name FROM measurements
JOIN species USING(species_id);""",
                  database='iris_db'):
    return db.get_db_url(comm=command, database=database)
def get_titanic_data(command='SELECT * FROM passengers',
                     database='titanic_db'):
    return db.get_db_url(comm=command, database=database)
    return db.get_db_url(comm=command, database=database)


def get_iris_data(command="""SELECT measurement_id,  sepal_length,  sepal_width
,petal_length, petal_width,  species.species_name FROM measurements
JOIN species USING(species_id);""",
                  database='iris_db'):
    return db.get_db_url(comm=command, database=database)


a = input('<<<<<>>>>>')

#error_me = me_error


df_iris = pd.DataFrame(db.get_db_url('SELECT * FROM measurements', \
                                     database = 'iris_db'))

print(df_iris.head(3))
print(df_iris.shape)
print(df_iris.columns)
print(df_iris.info())
print(df_iris.describe())
ints = df_iris.select_dtypes(include=['int64', 'float64'])
for i in ints:
    print(i)
    print(str(df_iris[i].max() - df_iris[i].min()))

print('<<<<<>>>>>')

df_excel = pd.DataFrame(pd.read_excel('mytable_customer_details.xlsx'))
df_excel_sample = df_excel.head(100)
    return_data['num_cols_missing'] = data.isnull().sum(axis=1)
    return_data['pct_cols_missing'] = (data.isnull().sum(axis=1)) / len(data)
    return_data['num_rows'] = '!'
    return return_data


zillow_data = db.get_db_url(comm="""Select * From properties_2017
Join (SELECT
p_17.parcelid,
logerror,
transactiondate
FROM predictions_2017 p_17
JOIN 
(SELECT
  parcelid, Max(transactiondate) as tdate
FROM
  predictions_2017
 
Group By parcelid )as sq1
ON (sq1.parcelid=p_17.parcelid and sq1.tdate = p_17.transactiondate )) sq2
USING (parcelid)
WHERE (latitude IS NOT NULL AND longitude IS NOT NULL)
AND properties_2017.propertylandusetypeid NOT IN (31, 47,246, 247, 248, 267, 290, 291)
LIMIT 10000;""",
                            database='zillow')

print(zillow_data)
print('---------------|DATABASE_INFO|---------------')
print(zillow_data.info())
print(zillow_data.describe())
print(zillow_data.shape)