def movie(compare_column=None, df=None): ''' Creates an animation of the beaches E. coli levels represented as circles. The circle's radius is proportional to the log of the E. coli levels. Additionally, when the E. coli level is above the threshold of 235 PPM, the circle color changes from blue to purple. You can optionally choose to vary the background color of the animation with another column of data, however, this does not seem like a great way to visualize the relationship between E. coli levels and another data-stream. Inputs ------ compare_column : The name or index of the column that will be used to vary the background color. If compare_column is None, then the background color will remain static. df : The dataframe to use. If None, then the dataframe will be read in using read_data. Returns ------- anim : The animation object. Example ------- >>> import read_data as rd >>> import visualizations as viz >>> df = rd.read_data() >>> viz.movie(df=df) ''' if df is None: df = read_data.read_data() if compare_column is None: to_compare = False else: to_compare = True if to_compare: compare_min = df[compare_column].dropna().min() compare_max = df[compare_column].dropna().max() bg_min_color = np.array([.75, .5, .2]) bg_max_color = np.array([.999, .999, 0.9]) file_name = '../data/ExternalData/Beach_Locations.csv' beach_locs = read_data.read_locations(file_name) # compute Mercator projection of lat/longs phi = 0.730191653 beach_locs['Latitude'] = beach_locs['Latitude'] * 110574.0 beach_locs['Longitude'] = beach_locs['Longitude'] * 111320.0 * np.cos(phi) lat_min = beach_locs['Latitude'].min() lat_max = beach_locs['Latitude'].max() lat_rng = lat_max - lat_min lon_min = beach_locs['Longitude'].min() lon_max = beach_locs['Longitude'].max() lon_rng = lon_max - lon_min def generate_index(): for timestamp in df.index.unique(): readings = df.ix[timestamp, 'Escherichia.coli'] if to_compare: compare = df.ix[timestamp, compare_column] if type(compare) is pd.Series: compare = compare.dropna().mean() if np.isnan(compare): continue if ((type(readings) is np.float64 and not np.isnan(readings)) or (type(readings) is not np.float64 and readings.count())): if not to_compare: compare = None yield timestamp, compare def animate(timestamp_and_compare): timestamp = timestamp_and_compare[0] compare = timestamp_and_compare[1] if to_compare: compare = (compare - compare_min) / compare_max bg_color = bg_min_color * compare + bg_max_color * (1. - compare) ax.set_axis_bgcolor(bg_color) for i, b in enumerate(beach_locs['Beach']): beach_filt = df.ix[timestamp, 'Client.ID'] == b beach_skipped = False try: if not beach_filt.sum() == 1: beach_skipped = True except AttributeError: # is a boolean if not beach_filt: beach_skipped = True if beach_skipped: ecoli = 0 else: ecoli = float(df.ix[timestamp, 'Escherichia.coli'][beach_filt]) r = 200 * np.log(ecoli) if b in circle_indexes: ax.artists[circle_indexes[b]].set_radius(r) if ecoli >= 235: ax.artists[circle_indexes[b]].set_facecolor( (0.301, 0, 1, 0.75)) else: ax.artists[circle_indexes[b]].set_facecolor( (0, 0.682, 1, 0.75)) else: circ = plt.Circle( (beach_locs.ix[i, 'Longitude'], beach_locs.ix[i, 'Latitude']), radius=r, edgecolor='none') ax.add_artist(circ) circle_indexes[b] = len(ax.artists) - 1 if ecoli >= 235: ax.artists[circle_indexes[b]].set_facecolor( (0.301, 0, 1, 0.75)) else: ax.artists[circle_indexes[b]].set_facecolor( (0, 0.682, 1, 0.75)) ax.title.set_text(timestamp.strftime('%d %B %Y')) return ax fig = plt.figure(figsize=(18, 10)) ax = plt.gcf().gca() ax.set_xlim([lon_min - lon_rng * 0.4, lon_max + lon_rng * 0.15]) ax.set_ylim([lat_min - lat_rng * 0.2, lat_max + lat_rng * 0.2]) ax.set_aspect('equal') circle_indexes = {} anim = animation.FuncAnimation(fig, animate, generate_index, repeat=False) plt.show(block=TO_BLOCK) return anim
def movie(compare_column=None, df=None): ''' Creates an animation of the beaches E. coli levels represented as circles. The circle's radius is proportional to the log of the E. coli levels. Additionally, when the E. coli level is above the threshold of 235 PPM, the circle color changes from blue to purple. You can optionally choose to vary the background color of the animation with another column of data, however, this does not seem like a great way to visualize the relationship between E. coli levels and another data-stream. Inputs ------ compare_column : The name or index of the column that will be used to vary the background color. If compare_column is None, then the background color will remain static. df : The dataframe to use. If None, then the dataframe will be read in using read_data. Returns ------- anim : The animation object. Example ------- >>> import read_data as rd >>> import visualizations as viz >>> df = rd.read_data() >>> viz.movie(df=df) ''' if df is None: df = read_data.read_data() if compare_column is None: to_compare = False else: to_compare = True if to_compare: compare_min = df[compare_column].dropna().min() compare_max = df[compare_column].dropna().max() bg_min_color = np.array([.75, .5, .2]) bg_max_color = np.array([.999, .999, 0.9]) file_name = '../data/ExternalData/Beach_Locations.csv' beach_locs = read_data.read_locations(file_name) # compute Mercator projection of lat/longs phi = 0.730191653 beach_locs['Latitude'] = beach_locs['Latitude'] * 110574.0 beach_locs['Longitude'] = beach_locs['Longitude'] * 111320.0 * np.cos(phi) lat_min = beach_locs['Latitude'].min() lat_max = beach_locs['Latitude'].max() lat_rng = lat_max - lat_min lon_min = beach_locs['Longitude'].min() lon_max = beach_locs['Longitude'].max() lon_rng = lon_max - lon_min def generate_index(): for timestamp in df.index.unique(): readings = df.ix[timestamp, 'Escherichia.coli'] if to_compare: compare = df.ix[timestamp, compare_column] if type(compare) is pd.Series: compare = compare.dropna().mean() if np.isnan(compare): continue if ((type(readings) is np.float64 and not np.isnan(readings)) or (type(readings) is not np.float64 and readings.count())): if not to_compare: compare = None yield timestamp, compare def animate(timestamp_and_compare): timestamp = timestamp_and_compare[0] compare = timestamp_and_compare[1] if to_compare: compare = (compare - compare_min) / compare_max bg_color = bg_min_color * compare + bg_max_color * (1. - compare) ax.set_axis_bgcolor(bg_color) for i, b in enumerate(beach_locs['Beach']): beach_filt = df.ix[timestamp, 'Client.ID'] == b beach_skipped = False try: if not beach_filt.sum() == 1: beach_skipped = True except AttributeError: # is a boolean if not beach_filt: beach_skipped = True if beach_skipped: ecoli = 0 else: ecoli = float(df.ix[timestamp, 'Escherichia.coli'][beach_filt]) r = 200 * np.log(ecoli) if b in circle_indexes: ax.artists[circle_indexes[b]].set_radius(r) if ecoli >= 235: ax.artists[circle_indexes[b]].set_facecolor( (0.301, 0, 1, 0.75)) else: ax.artists[circle_indexes[b]].set_facecolor( (0, 0.682, 1, 0.75)) else: circ = plt.Circle((beach_locs.ix[i,'Longitude'], beach_locs.ix[i,'Latitude']), radius=r, edgecolor='none') ax.add_artist(circ) circle_indexes[b] = len(ax.artists) - 1 if ecoli >= 235: ax.artists[circle_indexes[b]].set_facecolor( (0.301, 0, 1, 0.75)) else: ax.artists[circle_indexes[b]].set_facecolor( (0, 0.682, 1, 0.75)) ax.title.set_text(timestamp.strftime('%d %B %Y')) return ax fig = plt.figure(figsize=(18,10)) ax = plt.gcf().gca() ax.set_xlim([lon_min - lon_rng * 0.4, lon_max + lon_rng * 0.15]) ax.set_ylim([lat_min - lat_rng * 0.2, lat_max + lat_rng * 0.2]) ax.set_aspect('equal') circle_indexes = {} anim = animation.FuncAnimation(fig, animate, generate_index, repeat=False) plt.show(block=TO_BLOCK) return anim
def movie(data_column, lat_longs, df=None): if df is None: df = read_data.read_data() file_name = '../data/ExternalData/Beach_Locations.csv' beach_locs = read_data.read_locations(file_name) phi = 0.730191653 beach_locs['Latitude'] = beach_locs['Latitude'] * 110574.0 beach_locs['Longitude'] = beach_locs['Longitude'] * 111320.0 * np.cos(phi) # lat_longs = np.array(lat_longs) # lat_longs[:,0] = lat_longs[:,0] * 110574.0 # lat_longs[:,1] = lat_longs[:,1] * 111320.0 * np.cos(phi) # fig = plt.gcf() # ax = fig.gca() lat_min = beach_locs['Latitude'].min() lat_max = beach_locs['Latitude'].max() lat_rng = lat_max - lat_min lon_min = beach_locs['Longitude'].min() lon_max = beach_locs['Longitude'].max() lon_rng = lon_max - lon_min # ax.set_xlim([lon_min - lon_rng * 0.1, lon_max + lon_rng * 0.1]) # ax.set_ylim([lat_min - lat_rng * 0.1, lat_max + lat_rng * 0.1]) # ax.set_aspect('equal') def generate_index(): for timestamp in df.index.unique(): readings = df.ix[timestamp, 'Escherichia.coli'] if ((type(readings) is np.float64 and not np.isnan(readings)) or (type(readings) is not np.float64 and readings.count())): yield timestamp def animate(timestamp): for i, b in enumerate(beach_locs['Beach']): beach_filt = df.ix[timestamp, 'Client.ID'] == b beach_skipped = False try: if not beach_filt.sum() == 1: beach_skipped = True except AttributeError: # is a boolean if not beach_filt: beach_skipped = True if beach_skipped: ecoli = 0 else: ecoli = float(df.ix[timestamp, 'Escherichia.coli'][beach_filt]) r = 200 * np.log(ecoli) if b in circle_indexes: ax.artists[circle_indexes[b]].set_radius(r) if ecoli >= 235: ax.artists[circle_indexes[b]].set_facecolor((0.862, 0.357, 0.276, 0.8)) else: ax.artists[circle_indexes[b]].set_facecolor((0.262, 0.357, 0.576, 0.8)) else: circ = plt.Circle((beach_locs.ix[i,'Longitude'], beach_locs.ix[i,'Latitude']), radius=r) ax.add_artist(circ) circle_indexes[b] = len(ax.artists) - 1 if ecoli >= 235: ax.artists[circle_indexes[b]].set_facecolor((0.862, 0.357, 0.276, 0.8)) else: ax.artists[circle_indexes[b]].set_facecolor((0.262, 0.357, 0.576, 0.8)) ax.title.set_text(timestamp.strftime('%d %B %Y')) return ax fig = plt.figure(figsize=(18,10)) ax = plt.gcf().gca() ax.set_xlim([lon_min - lon_rng * 0.4, lon_max + lon_rng * 0.15]) ax.set_ylim([lat_min - lat_rng * 0.2, lat_max + lat_rng * 0.2]) ax.set_aspect('equal') circle_indexes = {} anim = animation.FuncAnimation(fig, animate, generate_index, repeat=False) mywriter = animation.FFMpegWriter(fps=30) anim.save('test.mp4', writer=mywriter) return anim