Example #1
0
def movie(compare_column=None, df=None):
    '''
    Creates an animation of the beaches E. coli levels represented as circles.
    The circle's radius is proportional to the log of the E. coli levels.
    Additionally, when the E. coli level is above the threshold of 235 PPM,
    the circle color changes from blue to purple. You can optionally choose
    to vary the background color of the animation with another column of data,
    however, this does not seem like a great way to visualize the relationship
    between E. coli levels and another data-stream.

    Inputs
    ------
    compare_column : The name or index of the column that will be used to vary
                     the background color. If compare_column is None, then the
                     background color will remain static.
    df             : The dataframe to use. If None, then the dataframe will be
                     read in using read_data.

    Returns
    -------
    anim : The animation object.

    Example
    -------
    >>> import read_data as rd
    >>> import visualizations as viz
    >>> df = rd.read_data()
    >>> viz.movie(df=df)
    '''

    if df is None:
        df = read_data.read_data()
    if compare_column is None:
        to_compare = False
    else:
        to_compare = True

    if to_compare:
        compare_min = df[compare_column].dropna().min()
        compare_max = df[compare_column].dropna().max()
        bg_min_color = np.array([.75, .5, .2])
        bg_max_color = np.array([.999, .999, 0.9])

    file_name = '../data/ExternalData/Beach_Locations.csv'
    beach_locs = read_data.read_locations(file_name)

    # compute Mercator projection of lat/longs
    phi = 0.730191653

    beach_locs['Latitude'] = beach_locs['Latitude'] * 110574.0
    beach_locs['Longitude'] = beach_locs['Longitude'] * 111320.0 * np.cos(phi)

    lat_min = beach_locs['Latitude'].min()
    lat_max = beach_locs['Latitude'].max()
    lat_rng = lat_max - lat_min
    lon_min = beach_locs['Longitude'].min()
    lon_max = beach_locs['Longitude'].max()
    lon_rng = lon_max - lon_min

    def generate_index():
        for timestamp in df.index.unique():
            readings = df.ix[timestamp, 'Escherichia.coli']
            if to_compare:
                compare = df.ix[timestamp, compare_column]
                if type(compare) is pd.Series:
                    compare = compare.dropna().mean()
                if np.isnan(compare):
                    continue
            if ((type(readings) is np.float64 and not np.isnan(readings)) or
                (type(readings) is not np.float64 and readings.count())):
                if not to_compare:
                    compare = None
                yield timestamp, compare

    def animate(timestamp_and_compare):
        timestamp = timestamp_and_compare[0]
        compare = timestamp_and_compare[1]

        if to_compare:
            compare = (compare - compare_min) / compare_max
            bg_color = bg_min_color * compare + bg_max_color * (1. - compare)
            ax.set_axis_bgcolor(bg_color)

        for i, b in enumerate(beach_locs['Beach']):
            beach_filt = df.ix[timestamp, 'Client.ID'] == b
            beach_skipped = False
            try:
                if not beach_filt.sum() == 1:
                    beach_skipped = True
            except AttributeError:  # is a boolean
                if not beach_filt:
                    beach_skipped = True

            if beach_skipped:
                ecoli = 0
            else:
                ecoli = float(df.ix[timestamp, 'Escherichia.coli'][beach_filt])

            r = 200 * np.log(ecoli)

            if b in circle_indexes:
                ax.artists[circle_indexes[b]].set_radius(r)
                if ecoli >= 235:
                    ax.artists[circle_indexes[b]].set_facecolor(
                        (0.301, 0, 1, 0.75))
                else:
                    ax.artists[circle_indexes[b]].set_facecolor(
                        (0, 0.682, 1, 0.75))
            else:
                circ = plt.Circle(
                    (beach_locs.ix[i, 'Longitude'], beach_locs.ix[i,
                                                                  'Latitude']),
                    radius=r,
                    edgecolor='none')
                ax.add_artist(circ)
                circle_indexes[b] = len(ax.artists) - 1
                if ecoli >= 235:
                    ax.artists[circle_indexes[b]].set_facecolor(
                        (0.301, 0, 1, 0.75))
                else:
                    ax.artists[circle_indexes[b]].set_facecolor(
                        (0, 0.682, 1, 0.75))
        ax.title.set_text(timestamp.strftime('%d %B %Y'))
        return ax

    fig = plt.figure(figsize=(18, 10))
    ax = plt.gcf().gca()
    ax.set_xlim([lon_min - lon_rng * 0.4, lon_max + lon_rng * 0.15])
    ax.set_ylim([lat_min - lat_rng * 0.2, lat_max + lat_rng * 0.2])
    ax.set_aspect('equal')
    circle_indexes = {}

    anim = animation.FuncAnimation(fig, animate, generate_index, repeat=False)
    plt.show(block=TO_BLOCK)

    return anim
def movie(compare_column=None, df=None):
    '''
    Creates an animation of the beaches E. coli levels represented as circles.
    The circle's radius is proportional to the log of the E. coli levels.
    Additionally, when the E. coli level is above the threshold of 235 PPM,
    the circle color changes from blue to purple. You can optionally choose
    to vary the background color of the animation with another column of data,
    however, this does not seem like a great way to visualize the relationship
    between E. coli levels and another data-stream.

    Inputs
    ------
    compare_column : The name or index of the column that will be used to vary
                     the background color. If compare_column is None, then the
                     background color will remain static.
    df             : The dataframe to use. If None, then the dataframe will be
                     read in using read_data.

    Returns
    -------
    anim : The animation object.

    Example
    -------
    >>> import read_data as rd
    >>> import visualizations as viz
    >>> df = rd.read_data()
    >>> viz.movie(df=df)
    '''

    if df is None:
        df = read_data.read_data()
    if compare_column is None:
        to_compare = False
    else:
        to_compare = True

    if to_compare:
        compare_min = df[compare_column].dropna().min()
        compare_max = df[compare_column].dropna().max()
        bg_min_color = np.array([.75, .5, .2])
        bg_max_color = np.array([.999, .999, 0.9])

    file_name = '../data/ExternalData/Beach_Locations.csv'
    beach_locs = read_data.read_locations(file_name)

    # compute Mercator projection of lat/longs
    phi = 0.730191653

    beach_locs['Latitude'] = beach_locs['Latitude'] * 110574.0
    beach_locs['Longitude'] = beach_locs['Longitude'] * 111320.0 * np.cos(phi)

    lat_min = beach_locs['Latitude'].min()
    lat_max = beach_locs['Latitude'].max()
    lat_rng = lat_max - lat_min
    lon_min = beach_locs['Longitude'].min()
    lon_max = beach_locs['Longitude'].max()
    lon_rng = lon_max - lon_min

    def generate_index():
        for timestamp in df.index.unique():
            readings = df.ix[timestamp, 'Escherichia.coli']
            if to_compare:
                compare = df.ix[timestamp, compare_column]
                if type(compare) is pd.Series:
                    compare = compare.dropna().mean()
                if np.isnan(compare):
                    continue
            if ((type(readings) is np.float64 and not np.isnan(readings)) or
                    (type(readings) is not np.float64 and readings.count())):
                if not to_compare:
                    compare = None
                yield timestamp, compare

    def animate(timestamp_and_compare):
        timestamp = timestamp_and_compare[0]
        compare = timestamp_and_compare[1]

        if to_compare:
            compare = (compare - compare_min) / compare_max
            bg_color = bg_min_color * compare + bg_max_color * (1. - compare)
            ax.set_axis_bgcolor(bg_color)

        for i, b in enumerate(beach_locs['Beach']):
            beach_filt = df.ix[timestamp, 'Client.ID'] == b
            beach_skipped = False
            try:
                if not beach_filt.sum() == 1:
                    beach_skipped = True
            except AttributeError:  # is a boolean
                if not beach_filt:
                    beach_skipped = True

            if beach_skipped:
                ecoli = 0
            else:
                ecoli = float(df.ix[timestamp, 'Escherichia.coli'][beach_filt])

            r = 200 * np.log(ecoli)

            if b in circle_indexes:
                ax.artists[circle_indexes[b]].set_radius(r)
                if ecoli >= 235:
                    ax.artists[circle_indexes[b]].set_facecolor(
                        (0.301, 0, 1, 0.75))
                else:
                    ax.artists[circle_indexes[b]].set_facecolor(
                        (0, 0.682, 1, 0.75))
            else:
                circ = plt.Circle((beach_locs.ix[i,'Longitude'],
                                   beach_locs.ix[i,'Latitude']),
                                  radius=r, edgecolor='none')
                ax.add_artist(circ)
                circle_indexes[b] = len(ax.artists) - 1
                if ecoli >= 235:
                    ax.artists[circle_indexes[b]].set_facecolor(
                        (0.301, 0, 1, 0.75))
                else:
                    ax.artists[circle_indexes[b]].set_facecolor(
                        (0, 0.682, 1, 0.75))
        ax.title.set_text(timestamp.strftime('%d %B %Y'))
        return ax

    fig = plt.figure(figsize=(18,10))
    ax = plt.gcf().gca()
    ax.set_xlim([lon_min - lon_rng * 0.4, lon_max + lon_rng * 0.15])
    ax.set_ylim([lat_min - lat_rng * 0.2, lat_max + lat_rng * 0.2])
    ax.set_aspect('equal')
    circle_indexes = {}

    anim = animation.FuncAnimation(fig, animate, generate_index, repeat=False)
    plt.show(block=TO_BLOCK)

    return anim
def movie(data_column, lat_longs, df=None):

    if df is None:
        df = read_data.read_data()

    file_name = '../data/ExternalData/Beach_Locations.csv'
    beach_locs = read_data.read_locations(file_name)

    phi = 0.730191653

    beach_locs['Latitude'] = beach_locs['Latitude'] * 110574.0
    beach_locs['Longitude'] = beach_locs['Longitude'] * 111320.0 * np.cos(phi)

    # lat_longs = np.array(lat_longs)
    # lat_longs[:,0] = lat_longs[:,0] * 110574.0
    # lat_longs[:,1] = lat_longs[:,1] * 111320.0 * np.cos(phi)

    # fig = plt.gcf()
    # ax = fig.gca()
    lat_min = beach_locs['Latitude'].min()
    lat_max = beach_locs['Latitude'].max()
    lat_rng = lat_max - lat_min
    lon_min = beach_locs['Longitude'].min()
    lon_max = beach_locs['Longitude'].max()
    lon_rng = lon_max - lon_min
    # ax.set_xlim([lon_min - lon_rng * 0.1, lon_max + lon_rng * 0.1])
    # ax.set_ylim([lat_min - lat_rng * 0.1, lat_max + lat_rng * 0.1])
    # ax.set_aspect('equal')

    def generate_index():
        for timestamp in df.index.unique():
            readings = df.ix[timestamp, 'Escherichia.coli']
            if ((type(readings) is np.float64 and not np.isnan(readings)) or
                    (type(readings) is not np.float64 and readings.count())):
                yield timestamp

    def animate(timestamp):
        for i, b in enumerate(beach_locs['Beach']):
            beach_filt = df.ix[timestamp, 'Client.ID'] == b
            beach_skipped = False
            try:
                if not beach_filt.sum() == 1:
                    beach_skipped = True
            except AttributeError:  # is a boolean
                if not beach_filt:
                    beach_skipped = True

            if beach_skipped:
                ecoli = 0
            else:
                ecoli = float(df.ix[timestamp, 'Escherichia.coli'][beach_filt])

            r = 200 * np.log(ecoli)

            if b in circle_indexes:
                ax.artists[circle_indexes[b]].set_radius(r)
                if ecoli >= 235:
                    ax.artists[circle_indexes[b]].set_facecolor((0.862, 0.357, 0.276, 0.8))
                else:
                    ax.artists[circle_indexes[b]].set_facecolor((0.262, 0.357, 0.576, 0.8))
            else:
                circ = plt.Circle((beach_locs.ix[i,'Longitude'],
                                   beach_locs.ix[i,'Latitude']),
                                  radius=r)
                ax.add_artist(circ)
                circle_indexes[b] = len(ax.artists) - 1
                if ecoli >= 235:
                    ax.artists[circle_indexes[b]].set_facecolor((0.862, 0.357, 0.276, 0.8))
                else:
                    ax.artists[circle_indexes[b]].set_facecolor((0.262, 0.357, 0.576, 0.8))
        ax.title.set_text(timestamp.strftime('%d %B %Y'))
        return ax

    fig = plt.figure(figsize=(18,10))
    ax = plt.gcf().gca()
    ax.set_xlim([lon_min - lon_rng * 0.4, lon_max + lon_rng * 0.15])
    ax.set_ylim([lat_min - lat_rng * 0.2, lat_max + lat_rng * 0.2])
    ax.set_aspect('equal')
    circle_indexes = {}

    anim = animation.FuncAnimation(fig, animate, generate_index, repeat=False)

    mywriter = animation.FFMpegWriter(fps=30)
    anim.save('test.mp4', writer=mywriter)

    return anim