def test_undefined_dataset():
    with pytest.raises(AttributeError) as err:
        data("blahblahblah")
    assert str(err.value) == "No dataset named 'blahblahblah'"
    with pytest.raises(AttributeError) as err:
        local_data("blahblahblah")
    assert str(err.value) == "No dataset named 'blahblahblah'"
Example #2
0
def get_cars_asp():

    ch = Chart(data('cars'))
    ch = ch.field('Horsepower', 'Displacement')
    qu = ch._get_full_query()
    for l in qu:
        print(l)
def test_load_local_dataset(name):
    loader = getattr(data, name.replace("-", "_"))
    local_loader = getattr(local_data, name.replace("-", "_"))

    df1 = data(name)
    df2 = loader()  # equivalent to data.dataset_name()
    df3 = local_data(name)
    df4 = local_loader()  # equivalent to local_data.dataset_name()
    assert_frame_equal(df1, df2)
    assert_frame_equal(df1, df3)
    assert_frame_equal(df1, df4)

    raw1 = loader.raw()
    raw2 = local_loader.raw()
    raw3 = data(name, return_raw=True)
    raw4 = local_data(name, return_raw=True)
    assert raw1 == raw2 == raw3 == raw4
    assert type(raw1) is type(raw2) is type(raw3) is type(raw4) is bytes
Example #4
0
    def index(self):
        """Render the plots"""
        plots = []

        df = data('iris')
        url = plot_iris_scatter(df)
        plots.append({
            'title': 'Scatterplot',
            'url': url
        })

        df = data('iris')
        url = plot_iris_histogram(df)
        plots.append({
            'title': 'Histogram',
            'url': url
        })

        return {
            'plots': plots
        }
Example #5
0
    def api(self):
        """Data for the plots"""
        dataset = self.request.params.get('dataset')

        if not dataset:
            raise ValueError('"dataset" value must be given')

        if dataset not in data.list_datasets():
            raise ValueError('Dataset "{}" not found'.format(dataset))

        df = data(dataset)

        return df.to_dict(orient='list')
Example #6
0
    def index(self):
        """Render the plots"""
        plots = []

        # Build plot
        df = data('iris')
        # source = ColumnDataSource(df)
        source = AjaxDataSource(data_url='/api/bokeh?dataset=iris')

        species = df['species'].unique()
        fill_color = factor_cmap('species', palette=Category10[len(species)], factors=species)

        hover = HoverTool(tooltips=[
            ('index', '$index'),
            ('petalLength', '@petalLength{0.0}'),
            ('sepalLength', '@sepalLength{0.0}'),
            ('species', '@species'),
        ])

        p = figure(title='Iris Morphology', tools=[
                   hover, 'pan', 'wheel_zoom', 'reset'], active_scroll='wheel_zoom')

        p.xaxis.axis_label = 'petalLength'
        p.yaxis.axis_label = 'sepalLength'

        p.circle(x='petalLength', y='sepalLength', color=fill_color,
                 fill_alpha=0.5, size=10, source=source)

        # Save plot
        script, div = components(p)

        basename = 'iris_scatter'
        filename = '{}/{}.js'.format(PLOTS_DIR, basename)
        url = to_url(filename)

        file = open(filename, 'w')
        file.write(etree.fromstring(script).text)
        file.close()

        plots.append({
            'title': 'Scatterplot',
            'div': div,
            'script_url': url
        })

        return {
            'plots': plots
        }
Example #7
0
    def index(self):
        """Render the plots"""
        plots = []

        # np.random.seed(sum(map(ord, 'calmap')))
        # all_days = pd.date_range('1/15/2014', periods=1200, freq='D')
        # days = np.sort(np.random.choice(all_days, 50))

        # # Year heatmap
        # events = pd.Series(np.random.randn(len(days)), index=days)
        # path, url = plot_year_heatmap(events)
        # plots.append({
        #     'title': 'Year heatmap',
        #     'url': url
        # })

        # # Calendar heatmap
        # events = pd.Series(np.random.randn(len(days)), index=days)
        # path, url = plot_calendar_heatmap(events)
        # plots.append({
        #     'title': 'Calendar heatmap',
        #     'url': url
        # })

        # Boxplot
        df = data('flights-2k')

        first_5 = df['origin'].unique()[:5]
        df = df[df['origin'].isin(first_5)]

        df['date'] = pd.to_datetime(df['date'])
        df['weekday_abbr'] = df['date'].dt.strftime('%a')
        df['weekday_rank'] = df['date'].dt.weekday
        df.sort_values('weekday_rank', inplace=True)

        with sns.axes_style('whitegrid'):
            ax = sns.boxplot(x='weekday_abbr',
                             y='delay',
                             hue='origin',
                             data=df,
                             dodge=True,
                             linewidth=1,
                             flierprops={
                                 'markersize': 5,
                                 'marker': '.'
                             })
            # ax = sns.swarmplot(x='weekday_abbr', y='delay', hue='origin', data=df,
            #                    dodge=True, color='.3', size=3)
            sns.despine(offset=10, trim=True)
            svg = save_figure(ax.get_figure(), SVG, 900, 450)

        # df = data('iris')

        # with sns.axes_style('whitegrid'):
        #     ax = sns.scatterplot(x='petalLength', y='sepalLength', hue='species',
        #                          palette='Spectral', data=df)
        #     svg = save_figure(ax.get_figure(), SVG, 400, 300)

        plots.append({'title': 'Scatterplot', 'svg': svg})

        # Scatterplot
        # df = data('iris')
        # path, url = plot_iris_scatter(df)
        # plots.append({
        #     'title': 'Scatterplot',
        #     'url': url
        # })

        return {'plots': plots}
Example #8
0
def get_cars_asp():

    ch = Chart(data('cars'))
    ch = ch.field('Horsepower', 'Displacement')
    qu = ch._get_full_query()
    for l in qu:
        print(l)


if __name__ == "__main__":

    # get_cars_asp()
    # exit(1)

    gapminder = data('gapminder')
    chart = Chart(gapminder)
    schema = chart._get_full_query()
    print('\n\n---- HERE IS THE DEFAULT VIEW AND SCHEMA ----')
    for l in schema:
        print(l)

    gapminder1 = Chart(gapminder).field('pop',
                                        scale='zero').field('life_expect',
                                                            scale='zero')
    # get the default view and schema asp.
    print('\n\n---- HERE IS THE DEFAULT VIEW, SCHEMA PLUS THE QUERY ----')
    query = gapminder1._get_full_query()
    for l in query:
        print(l)
def test_date_types(name, col):
    assert data(name)[col].dtype == "datetime64[ns]"
Example #10
0
#!/usr/bin/env python
# coding: utf-8

# In[1]:

from dziban.mkiv import Chart
from vega_datasets import data
from copy import deepcopy
import json
from tqdm import tqdm
import pandas as pd

# In[2]:

movies = data('movies')
movies.head(1)

# In[3]:

base = Chart(movies)

# In[4]:

q1 = 'IMDB_Rating'
q2 = 'Rotten_Tomatoes_Rating'
q3 = 'Worldwide_Gross'
n1 = 'Major_Genre'
n2 = 'MPAA_Rating'
n3 = 'Creative_Type'

fields = {'q': [q1, q2, q3], 'n': [n1, n2, n3]}
Example #11
0
"""
Stacked Bar Chart
-----------------

This example shows how to make a stacked bar chart of the weather type in Seattle from 2012 to 2015 by month.
"""

import altair as alt
from vega_datasets import data

weather = data('seattle-weather')

chart = alt.Chart(weather).mark_bar().encode(
    color=alt.Color(
        'weather:N',
        legend=alt.Legend(title='Weather type'),
        scale=alt.Scale(
            domain=['sun', 'fog', 'drizzle', 'rain', 'snow'],
            range=['#e7ba42', '#c7c7c7', '#aec7e8', '#1f77b4', '#9467bd']),
    ),
    x=alt.X(
        'date:N',
        axis=alt.Axis(title='Month of the Year'),
        timeUnit='month',
    ),
    y='count(*):Q',
)
Example #12
0
def load_dataset(name):
    """Load a dataset by name as a pandas.DataFrame."""
    import vega_datasets
    return vega_datasets.data(name)
Example #13
0
def animate(i):
    print(i)
    ax.clear()
    return plot()
anim = FuncAnimation(fig, animate, frames=range(10), interval=30)
plt.show()

###

from sklearn.linear_model import LinearRegression
from pydataset import data
import textwrap as tw

fig, ax = plt.subplots()
ax.set(xlim=(0, 45), ylim=(0, 8))
tips = data('tips').sample(8, random_state=456)
lm = LinearRegression().fit(tips[['total_bill']], tips.tip)
m = lm.coef_[0]
b = lm.intercept_
x = tips.total_bill
y = tips.tip
yhat = m * x + b
def animate(i):
    if i == 0:
        ax.clear()
        ax.plot(x, y, ls='', marker='o', label='actual ($y$)', color='black')
        ax.set(xlim=(0, 45), ylim=(0, 8), title='Our orignal X and Y data')
        ax.legend(loc='upper left')
    elif i == 1:
        ax.set(title='Add our line to make predictions, y = {:.2f}x + {:.2f}'.format(m, b))
        ax.plot(x, yhat, color='grey', label='')
def load_dataset(name):
    """Load a dataset by name as a pandas.DataFrame."""
    warnings.warn(MSG, DeprecationWarning)
    import vega_datasets
    return vega_datasets.data(name)