# Our own module import. from mad import median_mad # Import the data print('Importing...') zipped = zipfile.ZipFile('data/taxirides.csv.zip') file_list = zipped.namelist() file_in_archive = file_list[0] with zipped.open(file_list[0]) as f: data = pandas.read_csv(f) print('Calculating...') med_long, mad_long = median_mad(data.pickup_longitude) std_long = np.std(data.pickup_longitude) med_long, mad_long = median_mad(data.pickup_longitude) long_lo = med_long - 20*mad_long long_hi = med_long + 20*mad_long med_lat, mad_lat = median_mad(data.pickup_latitude) lat_lo = med_lat - 10*mad_lat lat_hi = med_lat + 10*mad_lat nwr = [40.69, -74.174] jfk = [40.641, -73.778] lga = [40.777, -73.874] airports = np.array([nwr, jfk, lga])
import matplotlib.pyplot as plt import zipfile from numpy import array import pandas from mad import median_mad zipped = zipfile.ZipFile('data/taxirides.csv.zip') file_list = zipped.namelist() file_in_archive = file_list[0] with zipped.open(file_list[0]) as f: data = pandas.read_csv(f) median, mad = median_mad(data.pickup_longitude) long_lo = median - 20*mad long_hi = median + 20*mad median, mad = median_mad(data.pickup_latitude) lat_lo = median - 10*mad lat_hi = median + 10*mad plt.subplots(figsize=(4, 3)) plt.plot(data.dropoff_longitude, data.dropoff_latitude, 'k,') plt.axis('scaled') plt.xlim(long_lo, long_hi) plt.ylim(lat_lo, lat_hi)