def get_data_from_zillow(): query = ''' select prop.parcelid , pred.logerror , pred.transactiondate , bathroomcnt , bedroomcnt , calculatedfinishedsquarefeet , fips , latitude , longitude , lotsizesquarefeet , regionidcity , regionidcounty , regionidneighborhood , regionidzip , yearbuilt , structuretaxvaluedollarcnt , taxvaluedollarcnt , landtaxvaluedollarcnt , taxamount from properties_2017 prop inner join predictions_2017 pred on prop.parcelid = pred.parcelid where propertylandusetypeid = 261 and structuretaxvaluedollarcnt < 1000000; ''' return pd.read_sql(query, env.get_url('zillow'))
def get_zillow_data(): query = ''' SELECT prop.*, pred1.logerror, pred1.transactiondate, air.airconditioningdesc, arch.architecturalstyledesc, build.buildingclassdesc, heat.heatingorsystemdesc, landuse.propertylandusedesc, story.storydesc, construct.typeconstructiondesc FROM properties_2017 prop LEFT JOIN predictions_2017 pred1 USING (parcelid) INNER JOIN (SELECT parcelid, Max(transactiondate) maxtransactiondate FROM predictions_2017 GROUP BY parcelid) pred2 ON pred1.parcelid = pred2.parcelid AND pred1.transactiondate = pred2.maxtransactiondate LEFT JOIN airconditioningtype air USING (airconditioningtypeid) LEFT JOIN architecturalstyletype arch USING (architecturalstyletypeid) LEFT JOIN buildingclasstype build USING (buildingclasstypeid) LEFT JOIN heatingorsystemtype heat USING (heatingorsystemtypeid) LEFT JOIN propertylandusetype landuse USING (propertylandusetypeid) LEFT JOIN storytype story USING (storytypeid) LEFT JOIN typeconstructiontype construct USING (typeconstructiontypeid) WHERE prop.latitude IS NOT NULL AND prop.longitude IS NOT NULL; ''' url = get_url('zillow') zillow = pd.read_sql(query, url) return zillow
def get_components(component_types): params = dict(ps=500, qualifiers=component_types) resp = requests.get(url=env.get_url() + '/api/projects/search', auth=env.get_credentials(), params=params) data = json.loads(resp.text) return data['components']
def get_titanic_data(): url = env.get_url('titanic_db') query = ''' SELECT * FROM passengers ''' return pd.read_sql(query, url)
def get_iris_data(): url = env.get_url('iris_db') query = ''' SELECT m.*, s.species_name FROM measurements m JOIN species s ON s.species_id = m.species_id; ''' return pd.read_sql(query, url)
def wrangle_telco(): ''' Returns a dataframe with the cleaned telco data ''' url = get_url('telco_churn') query = ''' SELECT customer_id, monthly_charges, tenure, total_charges FROM customers JOIN contract_types USING (contract_type_id) WHERE contract_type = 'Two year' ''' df = pd.read_sql(query, url) df.replace(' ', np.nan, inplace=True) df = df.dropna() df.total_charges = df.total_charges.astype('float') return df