def make_charts(df, color, x_axis_title, y_axis_title, title): chart = Chart(data=df, height=height, width=width).mark_bar(color=color).encode( X('name', axis=Axis(title=x_axis_title), sort=None), Y('data', axis=Axis(title=y_axis_title))).properties(title=title) return chart.to_json()
def data_bar_jail(): county_data = read_county_from_db(session.get('current_state'), session.get('current_county')) # Create the chart jail = Chart(data=county_data, height=HEIGHT, width=WIDTH).mark_bar(color='#444760').encode( X('year:O', axis=Axis(title='Year')), Y('total_jail_pop', axis=Axis(title='Total Jail Population')), tooltip=[ alt.Tooltip('year', title='Year'), alt.Tooltip('total_jail_pop', title='Total jail population') ]).properties(title='Jail population in {}'.format( session.get('current_county'))).interactive() # Create pre-trial chart to overlay on top pre_trial = Chart(data=county_data, height=HEIGHT, width=WIDTH).mark_bar( color="#d66241", interpolate='step-after', line=True, ).encode(X('year:O', axis=Axis(title='Year')), Y('total_jail_pretrial', axis=Axis(title='Number of inmates')), tooltip=[ alt.Tooltip('year', title='Year'), alt.Tooltip('total_jail_pretrial', title='Pre-trial jail population') ]).properties(title='Pre-trial jail population in {}'.format( session.get('current_county'))).interactive() chart = alt.layer(jail + pre_trial) return chart.to_json()
def data_bar_jail(): county_data = read_county_from_db(session.get( 'current_state'), session.get('current_county')) # Create a label for the jail population to be included in the chart. # Result of lambda is a float, thus the slice notation is used county_data['total_jail_pop_label'] = county_data['total_jail_pop'].apply(lambda x: "{:,}".format(x)[:-2]) county_data['total_jail_pretrial_label'] = county_data['total_jail_pretrial'].apply(lambda x: "{:,}".format(x)[:-2]) # Create the chart jail = Chart(data=county_data, height=HEIGHT, width=WIDTH).mark_bar(color='#444760').encode( X('year:O', axis=Axis(title='Year')), Y('total_jail_pop', axis=Axis(title='Total Jail Population')), tooltip=[alt.Tooltip('year', title='Year'), alt.Tooltip( 'total_jail_pop_label', title='Total jail population')] ).properties( title='Jail population in {}'.format(session.get('current_county')) ).interactive() # Create pre-trial chart to overlay on top pre_trial = Chart(data=county_data, height=HEIGHT, width=WIDTH).mark_bar( color="#d66241", interpolate='step-after', line=True, ).encode( X('year:O', axis=Axis(title='Year')), Y('total_jail_pretrial', axis=Axis(title='Number of inmates')), tooltip=[alt.Tooltip('year', title='Year'), alt.Tooltip( 'total_jail_pretrial_label', title='Pre-trial jail population')] ).properties( title='Pre-trial jail population in {}'.format( session.get('current_county')) ).interactive() chart = alt.layer(jail + pre_trial) return chart.to_json()
def data_line(): chart = (Chart(data=sample_data.df_list, height=HEIGHT, width=WIDTH).mark_line(color="green").encode( X("name", axis=Axis(title="Sample")), Y("data", axis=Axis(title="Value")), ).interactive()) return chart.to_json()
def county_scatter(): state_name = session.get('current_state') county_name = session.get('current_county') # Connect to the database conn = sqlite3.connect('./db/incarceration.db') # Determine whether 2015 or 2016 has more data year_2016_nulls = test_nulls_for_year(2016, state_name, conn) year_2015_nulls = test_nulls_for_year(2015, state_name, conn) year = 2016 # default year # Test to see if 2015 has more non-null values if year_2016_nulls.iloc[0]['PercentNotNull'] < year_2015_nulls.iloc[0]['PercentNotNull']: year = 2015 # Select prison population data for the entire state for the selected year all_counties_prison_pop = pd.read_sql_query(f"""SELECT county_name, total_pop, total_prison_pop, urbanicity FROM incarceration WHERE state = '{state_name}' AND year = {year}; """, conn) # Select prison population data for the specific county for the selected year county_prison_pop = pd.read_sql_query(f"""SELECT county_name, total_pop, total_prison_pop, urbanicity FROM incarceration WHERE state = '{state_name}' AND county_name = '{county_name}' AND year = {year}; """, conn) # Close connection conn.close() state_chart = Chart(data=all_counties_prison_pop, height=HEIGHT, width=WIDTH).mark_circle(size=70).encode( X('total_pop', axis=Axis(title='County population')), Y('total_prison_pop', axis=Axis(title='Total prison population')), color=alt.Color('urbanicity', legend=alt.Legend(title='Urbanicity')), size=alt.Color('total_pop', legend=alt.Legend( title='Total population')), tooltip=[alt.Tooltip('county_name', title='County'), alt.Tooltip( 'total_pop', title='Total county population'), alt.Tooltip('total_prison_pop', title='Total prison population')], ).properties( title='Statewide prison population {}, {}'.format(year, state_name)).interactive() county_chart=Chart(data=county_prison_pop, height=HEIGHT, width=WIDTH).mark_square( size=250, fillOpacity=0.5, stroke='black', color='black').encode( X('total_pop', axis=Axis(title='County population')), Y('total_prison_pop', axis=Axis(title='Total prison population')), tooltip=['county_name', 'total_pop', 'total_prison_pop'] ).interactive() chart=alt.layer(county_chart, state_chart) return chart.to_json()
def data_waterfall(): chart = (Chart( data=sample_data.df_water, width=WIDTH, ).mark_bar(color="gray").encode( X("Name", axis=Axis(title="Sample")), Y("Value", axis=Axis(title="Value")), ).interactive()) return chart.to_json()
def gen_var_barcharts_by_geo(data, var, agg_type, geo): data = aggregate_data(data, agg_type, geo) titlex = agg_type + ' of ' + var.split('_', 1)[1].replace('_', ' ') titley = geo.replace('_', ' ') bar_chart = alt.Chart(data).mark_bar().encode(x=X(var, axis=Axis(title=titlex)), y=Y((geo + ':O'), axis=Axis(title=titley))) with open('./runs/%s_by_%s.json' % (var, geo), 'w') as outfile: json.dump(bar_chart.to_json(), outfile)
def gen_custom_barchart(table, var): df = orca.get_table(table).to_frame(['parcel_id', var]).\ groupby(var).count().reset_index() df.rename(columns={'parcel_id': 'count_' + table}, inplace=True) chart = alt.Chart(df).mark_bar().encode(x=X('count_' + table, axis=Axis(title='count_' + table)), y=Y(var + ':O', axis=Axis(title=var))) with open('./runs/%s_by_%s.json' % (table, var), 'w') as outfile: json.dump(chart.to_json(), outfile)
def gen_barcharts_n_largest(data, var, agg_type, geo, n): data = aggregate_data(data, agg_type, geo) max_data = data.nlargest(n, var).reset_index() titlex = agg_type + ' of ' + var.split('_', 1)[1].replace('_', ' ') titley = geo.replace('_', ' ') bar_chart = alt.Chart(max_data).mark_bar().encode( x=X(var, axis=Axis(title=titlex)), y=Y(geo + ':O', axis=Axis(title=titley))) with open('./runs/%s_%ss_with_max_%s.json' % (n, geo, var), 'w') as outfile: json.dump(bar_chart.to_json(), outfile)
def data_bar_prison(): county_data = read_county_from_db(session.get('current_state'), session.get('current_county')) # Create the chart chart = Chart( data=county_data, height=HEIGHT, width=WIDTH).mark_bar(color='#2f3142').encode( X('year:O', axis=Axis(title='Year')), Y('total_prison_pop', axis=Axis(title='Total Prison Population'))).properties( title='Prison population in {}'.format( session.get('current_county'))) return chart.to_json()
def pretrial_jail_chart(): county_data = read_county_from_db(session.get('current_state'), session.get('current_county')) chart = Chart(data=county_data, height=HEIGHT, width=WIDTH).mark_line( color="#08080B", interpolate='step-after', line=True, ).encode(X('year:O', axis=Axis(title='Year')), Y('total_jail_pretrial', axis=Axis(title='Number of inmates')), tooltip=[ 'year', 'total_jail_pretrial' ]).properties(title='Pre-trial jail population in {}'.format( session.get('current_county'))).interactive() return chart.to_json()
def create_bins(data): columns = ['Wind Speed (kts)', 'Rain (mm)', 'Temp (◦C)', 'Humidity (%)'] filenumber = 1 for items in columns: means = data.groupby(['Location'])[items].mean() means = means.to_frame() bins = pd.cut(means[items], 3, labels=['Low', 'Moderate', 'High']) bins = bins.to_frame() bracket = items.index('(') legend = items[:bracket] bins.columns.values[0] = 'Average ' + legend + 'Category ' means.columns.values[0] = 'Average ' + items + ' Per Station' new_df = pd.concat([means, bins], axis=1, join_axes=[means.index]) new_df.reset_index(level=0, inplace=True) new_df graph = alt.Chart(new_df).mark_bar().encode( x='Location', y=Y(means.columns.values[0], axis=Axis(format='f')), #https://github.com/altair-viz/altair/issues/191 color=bins.columns.values[0]) filename = 'templates/plot' + str(filenumber) + '.html' graph.savechart(filename) filenumber += 1
def gen_var_histograms(data, var, agg_type, geo, vdict, cdict): data = aggregate_data(data, agg_type, geo) data = data.copy() type = vdict[var].split(' ')[0] if type == 'Log': log_var = var data[log_var] = data[var] else: log_var = 'log_' + var data[log_var] = np.log(data[var]) titlex = 'log of ' + var.split('_', 1)[1].replace('_', ' ') titley = 'number of ' + geo.replace('_', ' ') + 's' hist = alt.Chart(data).mark_bar().encode( alt.X(log_var, bin=True, axis=Axis(title=titlex)), alt.Y('count()', axis=Axis(title=titley))) with open('./runs/%s_histogram.json' % var, 'w') as outfile: json.dump(hist.to_json(), outfile)
def gen_var_scatters(data, var1, var2, agg1, agg2, geo_points, geo_large): colors = data.groupby(geo_points).min().reset_index() colors = colors[[geo_points, geo_large]] data_1 = aggregate_data(data, agg1, geo_points)[[var1, geo_points]] data_2 = aggregate_data(data, agg2, geo_points)[[var2, geo_points]] data = pd.merge(data_1, data_2, on=geo_points, how='left') data = pd.merge(data, colors, on=geo_points, how='left') titlex = agg1 + ' of ' + var1.split('_', 1)[1].replace('_', ' ') + ' by zone' titley = agg2 + ' of ' + var2.split('_', 1)[1].replace('_', ' ') + ' by zone' scatter = alt.Chart(data).mark_point().encode( x=X(var1, axis=Axis(title=titlex)), y=Y(var2, axis=Axis(title=titley)), color=geo_large + ':N', ) with open('./runs/%s_vs_%s.json' % (var2, var1), 'w') as outfile: json.dump(scatter.to_json(), outfile)
def data_bar_prison(): county_data = read_county_from_db(session.get( 'current_state'), session.get('current_county')) # Create a label for the prison population to be included in the chart. # Result of lambda is a float, thus the slice notation is used county_data['total_prison_pop_label'] = county_data['total_prison_pop'].apply(lambda x: "{:,}".format(x)[:-2]) # Create the chart chart = Chart(data=county_data, height=HEIGHT, width=WIDTH).mark_bar(color='#2f3142').encode( X('year:O', axis=Axis(title='Year')), Y('total_prison_pop', axis=Axis(title='Total Prison Population')), tooltip=[alt.Tooltip('year', title='Year'), alt.Tooltip( 'total_prison_pop_label', title='Total prison population')] ).properties( title='Prison population in {}'.format(session.get('current_county')) ).interactive() return chart.to_json()
def _encode_fields(self, xfield, yfield, time_unit=None, scale=Scale(zero=False)): """ Encode the fields in Altair format """ if scale is None: scale = Scale() xfieldtype = xfield[1] yfieldtype = yfield[1] x_options = None if len(xfield) > 2: x_options = xfield[2] y_options = None if len(yfield) > 2: y_options = yfield[2] if time_unit is not None: if x_options is None: xencode = X(xfieldtype, timeUnit=time_unit) else: xencode = X(xfieldtype, axis=Axis(**x_options), timeUnit=time_unit, scale=scale) else: if x_options is None: xencode = X(xfieldtype) else: xencode = X(xfieldtype, axis=Axis(**x_options), scale=scale) if y_options is None: yencode = Y(yfieldtype, scale=scale) else: yencode = Y(yfieldtype, axis=Axis(**y_options), scale=scale) return xencode, yencode
def color_heatmap(data, row, column, column_to_color, colormap_domain, colormap_range, cellsize=(10, 10)): """Create an Altair/vega-lite Heat-Map with colormap parameter Parameters ---------- data : dataframe to display, or url of csv file row, column, color, column_to_color : str Altair trait shorthands colormap_domain : list of strings - html color names, or hex value strings colormap_range : list of normalized values binned to colormap_domain cellsize : tuple specify (width, height) of cells in pixels """ return Chart(data).mark_text(applyColorToBackground=True, ).encode( color=Color( column_to_color, legend=Legend( orient='right', title=column_to_color, # # visible values in legend labels: # # in order to avoid blob of legend labels # # display only the max and min of values in given domain values=[colormap_domain[0], colormap_domain[-1]], labelAlign='left', ), scale=Scale(domain=colormap_domain, range=colormap_range), ), column=Column( column, axis=Axis( labelAngle=270.0, orient='bottom', title=column, ), ), row=row, text=Text(value=' ', ), ).configure_scale(textBandWidth=cellsize[0], bandSize=cellsize[1])
def data_waterfall(): chart = Chart(data.df_water).mark_bar(color='lightgreen').encode( X('Name', axis=Axis(title='Sample')), Y('Value', axis=Axis(title='Value'))) return chart.to_json()
def data_bar(): chart = Chart(data=sample_data.df_list, height=HEIGHT, width=WIDTH).mark_bar(color='yellow').encode( X('name', axis=Axis(title='Sample')), Y('data', axis=Axis(title='Value'))).interactive() return chart.to_json()
def data_waterfall(): chart = Chart(sample_data.df_water).mark_bar(color='gray').encode( X('Name', axis=Axis(title='Sample')), Y('Value', axis=Axis(title='Value'))).interactive() return chart.to_json()
data['temp'] = data['temp'].apply(f) json_filename = sys.argv[2] + '.json' csv_filename = sys.argv[2] + '.csv' data.to_json(path_or_buf=json_filename, orient='records', date_format='iso') #data.to_csv(path_or_buf=csv_filename) colors = [ "#67001f", "#b2182b", "#d6604d", "#f4a582", "#fddbc7", "#d1e5f0", "#92c5de", "#4393c3", "#2166ac", "#053061" ] colors = colors[::-1] #d = [0, 12, 24, 36, 48, 60, 72, 84, 96, 108] d = [0, 120] r = Row('dt:T', timeUnit='hours', axis=Axis(title='Hour of day')) c = Column('dt:T', timeUnit='monthdate', axis=Axis(format=u'%b', labels=False, title='Month')) col = Color( 'temp:N', bin=Bin(step=12), scale=Scale(domain=[0, 120], range=colors, clamp=True, zero=True), #scale=Scale(range=colors, domain=[0, 120], zero=True), legend=Legend(title="Temperature", format=u'.0f')) chart = Chart(data).mark_text(applyColorToBackground=True).encode( row=r, column=c, text=Text('blanks'), color=col).configure_scale(textBandWidth=3, bandSize=25) chart.max_rows = 8761
def multiline(): county_data = read_county_from_db(session.get( 'current_state'), session.get('current_county')) source = helper_functions.process_data(county_data) # Create a column for the label source['value_label'] = source['value'].apply(lambda x: helper_functions.round_non_null_nums(x)) # Create a selection that chooses the nearest point & selects based on x-value nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['year'], empty='none') demographics = ['Total white population (15-64)', 'Total black population (15-64)', 'White jail population', 'Black jail population', 'White prison population', 'Black prison population'] # Define color pairs matched to above demographics hex_colors = ['#cccec1', '#272727', '#cccec1', '#272727', '#cccec1', '#272727'] # Combine demographic and colors into a dictionary demographic_labels = dict(zip(demographics, hex_colors)) # Create pairs of variables to be used in the stacked charts wb_general = ['perc_white_total_pop', 'perc_black_total_pop'] wb_jail = ['perc_white_jail_pop', 'perc_black_jail_pop'] wb_prison = ['perc_white_prison_pop', 'perc_black_prison_pop'] # General population chart total_wb_population = alt.Chart(source[source['variable'].isin(wb_general)], height=150, width=500).mark_bar().encode( x=alt.X("year:O", axis=Axis(title='Year')), y=alt.Y("value:Q", stack="normalize", axis=Axis(title='Ratio')), color=alt.Color('demographic:N', legend=None, scale=alt.Scale(domain=list(demographic_labels.keys()), range=list(demographic_labels.values()) ) ) ).properties( title='Ratio of white/black residents in total county population (15-64)' ) if session.get('jail_data_exists'): # White/black jail population chart total_wb_jail = alt.Chart(source[source['variable'].isin(wb_jail)], height=150, width=500).mark_bar().encode( x=alt.X("year:O", axis=Axis(title='Year')), y=alt.Y("value:Q", stack="normalize", axis=Axis(title='Ratio')), color=alt.Color('demographic:N', legend=None, scale=alt.Scale(domain=list(demographic_labels.keys()), range=list(demographic_labels.values()) ) ) ).properties( title='Ratio of white/black inmates in jail population' ) if session.get('prison_data_exists'): total_wb_prison = alt.Chart(source[source['variable'].isin(wb_prison)], height=150, width=500).mark_bar().encode( x=alt.X("year:O", axis=Axis(title='Year')), y=alt.Y("value:Q", stack="normalize", axis=Axis(title='Ratio')), color=alt.Color('demographic:N', legend=None, scale=alt.Scale(domain=list(demographic_labels.keys()), range=list(demographic_labels.values()) ) ) ).properties( title='Ratio of white/black inmates in prison population' ) # Concatenate charts depending on what data is available if session.get('prison_data_exists') and session.get('jail_data_exists'): chart = alt.vconcat(total_wb_population, total_wb_jail, total_wb_prison) elif session.get('prison_data_exists') and not session.get('jail_data_exists'): chart = alt.vconcat(total_wb_population, total_wb_prison) elif not session.get('prison_data_exists') and session.get('jail_data_exists'): chart = alt.vconcat(total_wb_population, total_wb_jail) else: chart = total_wb_population return chart.to_json()
import pandas as pd import numpy as np from altair import Chart, X, Y, SortField, Detail, Axis csv_path = "../data/dropped-frames.csv" df = pd.read_csv(csv_path, parse_dates=["Dropped Frame Start", "Dropped Frame End"], low_memory=False) data = df[['Officer ID', 'Dropped Frame Start', 'Duration', 'FPS', 'Dropped Frames', 'Resolution', 'File Size', 'File Name', 'Frame Range', 'Player Time Range']] data = data.rename(columns={'Dropped Frame Start': 'Timestamp'}) ## Overview Chart(data.sample(100)).configure_axis(gridColor='#ccc').mark_line(interpolate='linear').encode( X(field='Timestamp', type='temporal', timeUnit='yearmonth', axis=Axis(title=' ', ticks=6, labelAngle=0, tickSizeEnd=0, tickSize=0, tickPadding=10)), Y('sum(Duration)', axis=Axis(title='Seconds lost')) ).savechart('test.svg')
x_vals = range(0, 1000) y_vals = [] for price in day_change: y_vals.append(price) data = Data(X=x_vals, Y=y_vals) chart = Chart(data) mark = chart.mark_point() enc = mark.encode(x='X', y='Y') enc.display() data = Data(change=day_change) chart = Chart(data) mark = chart.mark_bar() X = Axis('change:Q', bin=True) Y = Axis('count()') enc = mark.encode(x=X, y=Y) enc.display() print(mean(day_change)) print(stdev(day_change)) # predictions # print(lines2[999][4]) # 306.730011 change = 0 change_guess = [] start_price = 306.730011 worst = 0
def data_line(): chart = Chart(data=data.df_list, height=HEIGHT, width=WIDTH).mark_line().encode( X('name', axis=Axis(title='Sample')), Y('data', axis=Axis(title='Value'))) return chart.to_json()
def chartLog(): "Display chart for selected log" db_folder = app.config['UPLOAD_FOLDER'] logFiles = glob.glob('%s/*.db' % db_folder) form = ChartLog() form.logFile.choices = [(f, f) for f in logFiles] form.chartId.choices = [(q['id'], q['id']) for q in queries.graphs] try: dbname = app.dbname if os.path.exists(dbname): form.logFile.data = dbname except: pass if not form.validate_on_submit(): return render_template('chartLog.html', chart={}, dbName=None, form=form) dbname = os.path.join(form.logFile.data) if not os.path.exists(dbname): flash('Database does not exist', 'error') return render_template('error.html', title='Database error') try: conn = sqlite3.connect(dbname) except Exception as e: app.logger.error(traceback.format_exc()) flash('Error: %s' % (str(e)), 'error') return render_template('error.html', title='Error in database reporting') chartId = form.chartId.data charts = [q for q in queries.graphs if q['id'] == chartId] if not charts: flash("Error: logic error couldn't find chartId", 'error') return render_template( 'error.html', title='Error in in configuration of chart reports') q = charts[0] app.logger.debug("running chart query: %s - %s" % (q['title'], q['sql'])) start = datetime.now() try: df = pd.read_sql_query(q['sql'], conn) except Exception as e: flash('Error: %s' % (str(e)), 'error') return render_template('error.html', title='Error in database reporting') end = datetime.now() delta = end - start if q['graph_type'] == 'line': chart = Chart(data=df, height=HEIGHT, width=WIDTH).mark_line().encode( X(q['x']['field'], axis=Axis(title=q['x']['title'], labelOverlap='greedy')), Y(q['y']['field'], axis=Axis(title=q['y']['title']))) else: chart = Chart(data=df, height=HEIGHT, width=WIDTH).mark_bar().encode( X(q['x']['field'], axis=Axis(title=q['x']['title'], labelOverlap='greedy')), Y(q['y']['field'], axis=Axis(title=q['y']['title']))) data = { 'id': "chart", 'data': chart.to_json(), 'title': q['title'], 'explanation': q['explanation'], 'sql': q['sql'], 'time_taken': str(delta) } return render_template('chartLog.html', chart=data, dbName=dbname, form=form)
result['Time'] = result.apply(lambda row: news(row) , axis=1) df_dados = result.loc[:][['Rodada','Temporada','Time','Resultado']].copy() df_dados.sort_values(by=['Rodada'] ,ascending=True , inplace=True) df_dados['Acumulado'] = df_dados.groupby(['Temporada','Time'])[['Time','Resultado']].cumsum() x = st.slider('Selecione o ano',2012, 2019, (2012)) df_dados = df_dados[df_dados['Temporada'] == x].reset_index(drop=True) st.dataframe(df_dados) bars = alt.Chart(df_dados).mark_bar().encode( x=X('2:Q',axis=Axis(title='Brasileirao')), y=Y('0:Q',axis=Axis(title='Times')) ).properties( width=650, height=400 ) bar_plot = st.altair_chart(bars) def plot_bar_animated_altair(df,week): bars = alt.Chart(df, title="Ranking por Rodada :"+week) for week in range(1,39): teste = str(week) bars = plot_bar_animated_altair(df_dados[df_dados['Rodada']== teste],teste) time.sleep(0.01)